Refactor spreadsheet handling to track corrections and defaults
Improved the backend's value cleaning to differentiate between corrections and defaults, logging metadata for clearer traceability. Updated frontend to display corrected/defaulted fields with visual cues and tooltips for better user feedback. Enhanced data models and response structures to support this richer metadata.
This commit is contained in:
parent
f6c19cc4da
commit
c0951292d0
@ -96,9 +96,9 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
cleaned_value = importer._clean_value(
|
||||
original_value, expected_type, col_name
|
||||
)
|
||||
|
||||
corrected = False
|
||||
# Check if a correction was applied
|
||||
if cleaned_value != original_value:
|
||||
if cleaned_value[0] != original_value:
|
||||
corrected = True
|
||||
corrected_columns.append(col_name)
|
||||
|
||||
@ -156,10 +156,78 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
|
||||
)
|
||||
|
||||
updated_addinfo = [
|
||||
{
|
||||
"row_num": row["row_num"], # Identify row for the frontend
|
||||
"corrected_columns": row.get("corrected_columns", []),
|
||||
"default_set": [
|
||||
col_name
|
||||
for col_name in row.get("corrected_columns", [])
|
||||
if row.get("default_set", False) and col_name == "directory"
|
||||
], # Specify which keys are explicitly `default_set`
|
||||
}
|
||||
for row in updated_raw_data
|
||||
if row.get("corrected")
|
||||
or row.get("default_set") # Only include rows with changes
|
||||
]
|
||||
logger.debug(f"Constructed addinfo: {updated_addinfo}")
|
||||
# Clean updated raw data in place
|
||||
for row in updated_raw_data:
|
||||
# Remove unwanted metadata fields
|
||||
row.pop("corrected", None)
|
||||
row.pop("corrected_columns", None)
|
||||
row.pop("default_set", None)
|
||||
row.pop("defaulted_columns", None)
|
||||
row.pop("directory", None)
|
||||
|
||||
# Sanitize nested data (e.g., replace directory tuples with strings)
|
||||
if "data" in row:
|
||||
for idx, value in enumerate(row["data"]):
|
||||
if isinstance(value, tuple):
|
||||
row["data"][idx] = value[0] # Extract the first item (string)
|
||||
|
||||
# Confirm cleanup worked
|
||||
for row in updated_raw_data:
|
||||
unexpected_keys = [
|
||||
k
|
||||
for k in row.keys()
|
||||
if k
|
||||
in [
|
||||
"corrected",
|
||||
"corrected_columns",
|
||||
"default_set",
|
||||
"defaulted_columns",
|
||||
"directory",
|
||||
]
|
||||
]
|
||||
if unexpected_keys:
|
||||
logger.error(f"Unexpected keys persist: {unexpected_keys}")
|
||||
|
||||
# Construct stripped_raw_data from the cleaned updated_raw_data
|
||||
stripped_raw_data = [
|
||||
{
|
||||
k: v
|
||||
for k, v in row.items()
|
||||
if k
|
||||
not in [
|
||||
"corrected",
|
||||
"corrected_columns",
|
||||
"default_set",
|
||||
"defaulted_columns",
|
||||
"directory",
|
||||
]
|
||||
}
|
||||
for row in updated_raw_data
|
||||
]
|
||||
|
||||
# Verify the final stripped raw data before returning
|
||||
# logger.debug(f"Sanitized raw_data for response: {stripped_raw_data}")
|
||||
|
||||
response_data = SpreadsheetResponse(
|
||||
data=validated_model,
|
||||
errors=errors,
|
||||
raw_data=updated_raw_data,
|
||||
raw_data=stripped_raw_data, # Final submission data
|
||||
addinfo=updated_addinfo, # Metadata for frontend display
|
||||
dewars_count=len(dewars),
|
||||
dewars=list(dewars),
|
||||
pucks_count=len(pucks),
|
||||
@ -168,7 +236,6 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
samples=list(samples),
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
|
||||
# Store row data for future use
|
||||
for idx, row in enumerate(validated_model):
|
||||
|
@ -110,7 +110,10 @@ class SpreadsheetModel(BaseModel):
|
||||
class SpreadsheetResponse(BaseModel):
|
||||
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
|
||||
errors: List[Dict[str, Any]] # Errors encountered during validation
|
||||
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
|
||||
raw_data: List[
|
||||
Dict[str, Any]
|
||||
] # perhaps this has to be changed with th actual model !
|
||||
addinfo: List[Dict[str, Any]]
|
||||
dewars_count: int
|
||||
dewars: List[str]
|
||||
pucks_count: int
|
||||
|
@ -65,37 +65,33 @@ class SampleSpreadsheetImporter:
|
||||
def _clean_value(self, value, expected_type=None, column_name=None):
|
||||
"""
|
||||
Cleans and validates the given value based on its expected type.
|
||||
Different behavior is applied to specific columns if needed.
|
||||
Tracks corrections and defaults applied separately.
|
||||
"""
|
||||
default_applied = False
|
||||
|
||||
# If the value is None or empty string
|
||||
if value is None or (isinstance(value, str) and value.strip() == ""):
|
||||
# Handle empty or None values
|
||||
if column_name == "directory":
|
||||
logger.warning("Directory value is empty. Assigning default value.")
|
||||
self.default_set = True # Flag to indicate a default value is set.
|
||||
return "{sgPuck}/{sgPosition}" # Default directory
|
||||
self.default_set = False
|
||||
return None
|
||||
default_applied = True
|
||||
return "{sgPuck}/{sgPosition}", default_applied
|
||||
|
||||
# Convert to string and strip whitespaces
|
||||
return None, default_applied
|
||||
|
||||
# Clean up the value
|
||||
cleaned_value = str(value).strip()
|
||||
|
||||
# Handle specific column behaviors
|
||||
# Handle `type` casting logic
|
||||
if expected_type == str:
|
||||
if expected_type == str:
|
||||
if column_name is None:
|
||||
logger.warning(f"Missing column_name for value: {value}")
|
||||
elif column_name == "comments":
|
||||
return " ".join(cleaned_value.split()) # Normalize excessive spaces
|
||||
|
||||
else:
|
||||
# Replace spaces with underscores for general string columns
|
||||
return cleaned_value.replace(" ", "_")
|
||||
if column_name == "comments":
|
||||
return " ".join(cleaned_value.split()), default_applied
|
||||
if " " in cleaned_value:
|
||||
cleaned_value = cleaned_value.replace(" ", "_")
|
||||
|
||||
elif expected_type in [int, float]:
|
||||
try:
|
||||
# Remove any invalid characters and cast to the expected type
|
||||
cleaned_value = re.sub(r"[^\d.]", "", cleaned_value)
|
||||
return expected_type(cleaned_value)
|
||||
cleaned_value = expected_type(cleaned_value)
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.error(
|
||||
f"Failed to cast value '{value}' to {expected_type}. Error: {e}"
|
||||
@ -104,8 +100,15 @@ class SampleSpreadsheetImporter:
|
||||
f"Invalid value: '{value}'. Expected type: {expected_type}."
|
||||
)
|
||||
|
||||
# Return cleaned value for other types
|
||||
return cleaned_value
|
||||
# Avoid marking `None -> None` as a correction
|
||||
if cleaned_value == value:
|
||||
default_applied = (
|
||||
False # Ensure default_applied stays False for unchanged `value`.
|
||||
)
|
||||
|
||||
if not isinstance(cleaned_value, (str, int, float)):
|
||||
raise TypeError(f"Unexpected type for cleaned value: {type(cleaned_value)}")
|
||||
return cleaned_value, default_applied
|
||||
|
||||
def import_spreadsheet(self, file):
|
||||
return self.import_spreadsheet_with_errors(file)
|
||||
@ -200,30 +203,49 @@ class SampleSpreadsheetImporter:
|
||||
logger.debug(f"Skipping empty row at index {index}")
|
||||
continue
|
||||
|
||||
# Record raw data for later use
|
||||
raw_data.append({"row_num": index + 4, "data": list(row)})
|
||||
|
||||
# Ensure row has the expected number of columns
|
||||
if len(row) < expected_columns:
|
||||
row = list(row) + [None] * (expected_columns - len(row))
|
||||
|
||||
# Prepare the record dynamically based on headers
|
||||
# Reset flags for the current row
|
||||
self.default_set = False
|
||||
corrected = False
|
||||
defaulted_columns = []
|
||||
corrected_columns = []
|
||||
record = {}
|
||||
|
||||
for col_idx, column_name in enumerate(headers):
|
||||
original_value = row[col_idx] if col_idx < len(row) else None
|
||||
expected_type = self.get_expected_type(column_name)
|
||||
|
||||
# Call _clean_value dynamically with the correct column_name
|
||||
try:
|
||||
cleaned_value = self._clean_value(
|
||||
# Call `_clean_value` to clean the value and extract
|
||||
# cleaning-related indicators
|
||||
cleaned_value, default_applied = self._clean_value(
|
||||
original_value, expected_type, column_name
|
||||
)
|
||||
|
||||
# Check if the cleaned value is meaningfully different from the
|
||||
# original value
|
||||
is_corrected = cleaned_value != original_value
|
||||
|
||||
# Append column to corrected columns only if the value was corrected
|
||||
if is_corrected:
|
||||
corrected = True
|
||||
corrected_columns.append(column_name)
|
||||
|
||||
# Track default columns separately if a default was applied
|
||||
if default_applied:
|
||||
corrected = True
|
||||
defaulted_columns.append(column_name)
|
||||
|
||||
# Update the record with cleaned value (store only the cleaned part,
|
||||
# not the tuple)
|
||||
record[column_name] = cleaned_value
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.error(
|
||||
f"Validation error for row {index + 4},"
|
||||
f" column '{column_name}': "
|
||||
f"{str(e)}"
|
||||
f"Validation error for row {index + 4}"
|
||||
f", column '{column_name}': {str(e)}"
|
||||
)
|
||||
errors.append(
|
||||
{
|
||||
@ -234,63 +256,71 @@ class SampleSpreadsheetImporter:
|
||||
}
|
||||
)
|
||||
|
||||
# Build metadata for the row
|
||||
raw_data.append(
|
||||
{
|
||||
"row_num": index + 4,
|
||||
"data": list(row), # Original data
|
||||
"default_set": bool(
|
||||
defaulted_columns
|
||||
), # True if any defaults were applied
|
||||
"corrected": corrected, # True if any value was corrected
|
||||
# List of corrected columns (if any)
|
||||
"corrected_columns": corrected_columns,
|
||||
# List of defaulted columns (if any)
|
||||
"defaulted_columns": defaulted_columns,
|
||||
}
|
||||
)
|
||||
|
||||
# Nested processing for data_collection_parameters
|
||||
record["data_collection_parameters"] = {
|
||||
"directory": record.get("directory"),
|
||||
"oscillation": record.get("oscillation"),
|
||||
"aperture": record.get("aperture"),
|
||||
"exposure": record.get("exposure"),
|
||||
"totalrange": record.get("totalrange"),
|
||||
"transmission": record.get("transmission"),
|
||||
"dose": record.get("dose"),
|
||||
"targetresolution": record.get("targetresolution"),
|
||||
"datacollectiontype": record.get("datacollectiontype"),
|
||||
"processingpipeline": record.get("processingpipeline"),
|
||||
"spacegroupnumber": record.get("spacegroupnumber"),
|
||||
"cellparameters": record.get("cellparameters"),
|
||||
"rescutkey": record.get("rescutkey"),
|
||||
"rescutvalue": record.get("rescutvalue"),
|
||||
"userresolution": record.get("userresolution"),
|
||||
"pdbid": record.get("pdbid"),
|
||||
"autoprocfull": record.get("autoprocfull"),
|
||||
"procfull": record.get("procfull"),
|
||||
"adpenabled": record.get("adpenabled"),
|
||||
"noano": record.get("noano"),
|
||||
"ffcscampaign": record.get("ffcscampaign"),
|
||||
"trustedhigh": record.get("trustedhigh"),
|
||||
"autoprocextraparams": record.get("autoprocextraparams"),
|
||||
"chiphiangles": record.get("chiphiangles"),
|
||||
"directory": record.get("directory", ""),
|
||||
"oscillation": record.get("oscillation", 0.0),
|
||||
"aperture": record.get("aperture", None),
|
||||
"exposure": record.get("exposure", 0.0),
|
||||
"totalrange": record.get("totalrange", 0),
|
||||
"transmission": record.get("transmission", 0),
|
||||
"dose": record.get("dose", None),
|
||||
"targetresolution": record.get("targetresolution", 0.0),
|
||||
"datacollectiontype": record.get("datacollectiontype", None),
|
||||
"processingpipeline": record.get("processingpipeline", None),
|
||||
"spacegroupnumber": record.get("spacegroupnumber", None),
|
||||
"cellparameters": record.get("cellparameters", None),
|
||||
"rescutkey": record.get("rescutkey", None),
|
||||
"rescutvalue": record.get("rescutvalue", 0.0),
|
||||
"userresolution": record.get("userresolution", 0.0),
|
||||
"pdbid": record.get("pdbid", ""),
|
||||
"autoprocfull": record.get("autoprocfull", False),
|
||||
"procfull": record.get("procfull", False),
|
||||
"adpenabled": record.get("adpenabled", False),
|
||||
"noano": record.get("noano", False),
|
||||
"ffcscampaign": record.get("ffcscampaign", False),
|
||||
"trustedhigh": record.get("trustedhigh", 0.0),
|
||||
"autoprocextraparams": record.get("autoprocextraparams", None),
|
||||
"chiphiangles": record.get("chiphiangles", 0.0),
|
||||
}
|
||||
|
||||
try:
|
||||
# Validate the record
|
||||
validated_record = SpreadsheetModel(**record)
|
||||
|
||||
# Add validated record to the model
|
||||
model.append(validated_record)
|
||||
except ValidationError as e:
|
||||
logger.error(f"Validation error in row {index + 4}: {e}")
|
||||
for error in e.errors():
|
||||
field_path = error["loc"]
|
||||
msg = error["msg"]
|
||||
|
||||
if field_path[0] == "data_collection_parameters":
|
||||
subfield = field_path[1]
|
||||
column_index = headers.index(subfield)
|
||||
else:
|
||||
field = field_path[0]
|
||||
column_index = headers.index(field)
|
||||
column_name = headers[field_path[0]]
|
||||
|
||||
error_info = {
|
||||
"row": index + 4,
|
||||
"cell": column_index,
|
||||
"value": row[column_index],
|
||||
"column": column_name,
|
||||
"value": row[col_idx],
|
||||
"message": msg,
|
||||
}
|
||||
errors.append(error_info)
|
||||
|
||||
self.model = model
|
||||
logger.info(
|
||||
f"Finished processing {len(model)} records with {len(errors)} errors"
|
||||
)
|
||||
|
||||
return self.model, errors, raw_data, headers # Include headers in the response
|
||||
|
@ -29,7 +29,8 @@ const SpreadsheetTable = ({
|
||||
setRawData,
|
||||
onCancel,
|
||||
fileBlob,
|
||||
selectedShipment
|
||||
selectedShipment,
|
||||
addinfo,
|
||||
}) => {
|
||||
const [localErrors, setLocalErrors] = useState(errors || []);
|
||||
const [editingCell, setEditingCell] = useState({});
|
||||
@ -38,7 +39,23 @@ const SpreadsheetTable = ({
|
||||
const [showUpdateDialog, setShowUpdateDialog] = useState(false);
|
||||
const [dewarsToReplace, setDewarsToReplace] = useState([]);
|
||||
const [dewarsToCreate, setDewarsToCreate] = useState(new Map());
|
||||
const [correctionMetadata, setCorrectionMetadata] = useState(addinfo || []); // Store addinfo
|
||||
|
||||
const enhancedRawData = raw_data.map((row) => {
|
||||
const metadata = correctionMetadata.find((info) => info.row_num === row.row_num) || {};
|
||||
|
||||
// Combine original row data with metadata
|
||||
return {
|
||||
...row,
|
||||
corrected_columns: metadata.corrected_columns || [], // Columns corrected
|
||||
default_set_columns: metadata.default_set || [], // Specific columns default-assigned
|
||||
};
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
console.log("Correction Metadata:", correctionMetadata);
|
||||
console.log("Addinfo:", addinfo);
|
||||
}, [correctionMetadata, addinfo]);
|
||||
const initialNewDewarState = {
|
||||
number_of_pucks: 0,
|
||||
number_of_samples: 0,
|
||||
@ -461,6 +478,14 @@ const SpreadsheetTable = ({
|
||||
|
||||
return (
|
||||
<TableContainer component={Paper}>
|
||||
<Box display="flex" justifyContent="space-between" mb={2}>
|
||||
<Typography variant="body2" style={{ backgroundColor: "#e6fbe6", padding: "4px 8px", borderRadius: "4px" }}>
|
||||
Default Assigned (Light Green)
|
||||
</Typography>
|
||||
<Typography variant="body2" style={{ backgroundColor: "#fff8e1", padding: "4px 8px", borderRadius: "4px" }}>
|
||||
Corrected (Light Yellow)
|
||||
</Typography>
|
||||
</Box>
|
||||
<Table>
|
||||
<TableHead>
|
||||
<TableRow>
|
||||
@ -483,7 +508,7 @@ const SpreadsheetTable = ({
|
||||
</Button>
|
||||
</Box>
|
||||
<TableBody>
|
||||
{raw_data.map((row, rowIndex) => (
|
||||
{enhancedRawData.map((row, rowIndex) => (
|
||||
<TableRow key={rowIndex}>
|
||||
{headers.map((header, colIndex) => {
|
||||
const key = `${row.row_num}-${header}`;
|
||||
@ -491,37 +516,30 @@ const SpreadsheetTable = ({
|
||||
const isInvalid = !!errorMessage;
|
||||
const cellValue = row.data[colIndex];
|
||||
const editingValue = editingCell[`${rowIndex}-${colIndex}`];
|
||||
const isCellCorrected = row.corrected_columns?.includes(header); // Check if this column is marked as corrected
|
||||
const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true
|
||||
|
||||
|
||||
// Dynamic styles for corrected cells
|
||||
const cellStyle = {
|
||||
backgroundColor:
|
||||
isDefaultAssigned
|
||||
? "#e6fbe6" // Light green for default values
|
||||
: isCellCorrected
|
||||
? "#fff8e1" // Light yellow for corrected values
|
||||
: "transparent", // Default for others
|
||||
color: isDefaultAssigned
|
||||
? "#1b5e20" // Dark green for default values
|
||||
: "inherit", // Default for others
|
||||
fontWeight: (isCellCorrected || isDefaultAssigned) ? "bold" : "normal", // Bold text for any change
|
||||
cursor: isInvalid ? "pointer" : "default", // Mouse pointer indicates interactive error cells
|
||||
};
|
||||
const isCellCorrected = row.corrected_columns?.includes(header); // Use corrected metadata
|
||||
const isDefaultAssigned = row.default_set_columns?.includes(header); // Dynamically match header name
|
||||
|
||||
return (
|
||||
<TableCell
|
||||
key={colIndex}
|
||||
align="center"
|
||||
style={cellStyle}
|
||||
style={{
|
||||
backgroundColor: isDefaultAssigned
|
||||
? "#e6fbe6" // Light green
|
||||
: isCellCorrected
|
||||
? "#fff8e1" // Light yellow
|
||||
: "transparent",
|
||||
color: isDefaultAssigned ? "#1b5e20" : "inherit",
|
||||
fontWeight: isCellCorrected || isDefaultAssigned ? "bold" : "normal",
|
||||
cursor: isInvalid ? "pointer" : "default",
|
||||
}}
|
||||
>
|
||||
<Tooltip
|
||||
title={
|
||||
isDefaultAssigned
|
||||
? "This value was automatically assigned by the system as a default."
|
||||
? "This value was automatically assigned as a default."
|
||||
: isCellCorrected
|
||||
? "Value corrected automatically by the system."
|
||||
? `Field "${header}" was auto-corrected.`
|
||||
: errorMessage || ""
|
||||
}
|
||||
arrow
|
||||
|
@ -180,6 +180,7 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose, selectedShip
|
||||
onCancel={handleCancel}
|
||||
fileBlob={fileBlob}
|
||||
selectedShipment={selectedShipment}
|
||||
addinfo={fileSummary.addinfo}
|
||||
/>
|
||||
</Modal>
|
||||
)}
|
||||
|
Loading…
x
Reference in New Issue
Block a user