Refactor spreadsheet handling to track corrections and defaults

Improved the backend's value cleaning to differentiate between corrections and defaults, logging metadata for clearer traceability. Updated frontend to display corrected/defaulted fields with visual cues and tooltips for better user feedback. Enhanced data models and response structures to support this richer metadata.
This commit is contained in:
GotthardG 2025-01-14 21:46:11 +01:00
parent f6c19cc4da
commit c0951292d0
5 changed files with 213 additions and 94 deletions

View File

@ -96,9 +96,9 @@ async def upload_file(file: UploadFile = File(...)):
cleaned_value = importer._clean_value( cleaned_value = importer._clean_value(
original_value, expected_type, col_name original_value, expected_type, col_name
) )
corrected = False
# Check if a correction was applied # Check if a correction was applied
if cleaned_value != original_value: if cleaned_value[0] != original_value:
corrected = True corrected = True
corrected_columns.append(col_name) corrected_columns.append(col_name)
@ -156,10 +156,78 @@ async def upload_file(file: UploadFile = File(...)):
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}" f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
) )
updated_addinfo = [
{
"row_num": row["row_num"], # Identify row for the frontend
"corrected_columns": row.get("corrected_columns", []),
"default_set": [
col_name
for col_name in row.get("corrected_columns", [])
if row.get("default_set", False) and col_name == "directory"
], # Specify which keys are explicitly `default_set`
}
for row in updated_raw_data
if row.get("corrected")
or row.get("default_set") # Only include rows with changes
]
logger.debug(f"Constructed addinfo: {updated_addinfo}")
# Clean updated raw data in place
for row in updated_raw_data:
# Remove unwanted metadata fields
row.pop("corrected", None)
row.pop("corrected_columns", None)
row.pop("default_set", None)
row.pop("defaulted_columns", None)
row.pop("directory", None)
# Sanitize nested data (e.g., replace directory tuples with strings)
if "data" in row:
for idx, value in enumerate(row["data"]):
if isinstance(value, tuple):
row["data"][idx] = value[0] # Extract the first item (string)
# Confirm cleanup worked
for row in updated_raw_data:
unexpected_keys = [
k
for k in row.keys()
if k
in [
"corrected",
"corrected_columns",
"default_set",
"defaulted_columns",
"directory",
]
]
if unexpected_keys:
logger.error(f"Unexpected keys persist: {unexpected_keys}")
# Construct stripped_raw_data from the cleaned updated_raw_data
stripped_raw_data = [
{
k: v
for k, v in row.items()
if k
not in [
"corrected",
"corrected_columns",
"default_set",
"defaulted_columns",
"directory",
]
}
for row in updated_raw_data
]
# Verify the final stripped raw data before returning
# logger.debug(f"Sanitized raw_data for response: {stripped_raw_data}")
response_data = SpreadsheetResponse( response_data = SpreadsheetResponse(
data=validated_model, data=validated_model,
errors=errors, errors=errors,
raw_data=updated_raw_data, raw_data=stripped_raw_data, # Final submission data
addinfo=updated_addinfo, # Metadata for frontend display
dewars_count=len(dewars), dewars_count=len(dewars),
dewars=list(dewars), dewars=list(dewars),
pucks_count=len(pucks), pucks_count=len(pucks),
@ -168,7 +236,6 @@ async def upload_file(file: UploadFile = File(...)):
samples=list(samples), samples=list(samples),
headers=headers, headers=headers,
) )
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}") logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use # Store row data for future use
for idx, row in enumerate(validated_model): for idx, row in enumerate(validated_model):

View File

@ -110,7 +110,10 @@ class SpreadsheetModel(BaseModel):
class SpreadsheetResponse(BaseModel): class SpreadsheetResponse(BaseModel):
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
errors: List[Dict[str, Any]] # Errors encountered during validation errors: List[Dict[str, Any]] # Errors encountered during validation
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet raw_data: List[
Dict[str, Any]
] # perhaps this has to be changed with th actual model !
addinfo: List[Dict[str, Any]]
dewars_count: int dewars_count: int
dewars: List[str] dewars: List[str]
pucks_count: int pucks_count: int

View File

@ -65,37 +65,33 @@ class SampleSpreadsheetImporter:
def _clean_value(self, value, expected_type=None, column_name=None): def _clean_value(self, value, expected_type=None, column_name=None):
""" """
Cleans and validates the given value based on its expected type. Cleans and validates the given value based on its expected type.
Different behavior is applied to specific columns if needed. Tracks corrections and defaults applied separately.
""" """
default_applied = False
# If the value is None or empty string
if value is None or (isinstance(value, str) and value.strip() == ""): if value is None or (isinstance(value, str) and value.strip() == ""):
# Handle empty or None values
if column_name == "directory": if column_name == "directory":
logger.warning("Directory value is empty. Assigning default value.") logger.warning("Directory value is empty. Assigning default value.")
self.default_set = True # Flag to indicate a default value is set. default_applied = True
return "{sgPuck}/{sgPosition}" # Default directory return "{sgPuck}/{sgPosition}", default_applied
self.default_set = False
return None
# Convert to string and strip whitespaces return None, default_applied
# Clean up the value
cleaned_value = str(value).strip() cleaned_value = str(value).strip()
# Handle specific column behaviors # Handle `type` casting logic
if expected_type == str: if expected_type == str:
if expected_type == str: if column_name == "comments":
if column_name is None: return " ".join(cleaned_value.split()), default_applied
logger.warning(f"Missing column_name for value: {value}") if " " in cleaned_value:
elif column_name == "comments": cleaned_value = cleaned_value.replace(" ", "_")
return " ".join(cleaned_value.split()) # Normalize excessive spaces
else:
# Replace spaces with underscores for general string columns
return cleaned_value.replace(" ", "_")
elif expected_type in [int, float]: elif expected_type in [int, float]:
try: try:
# Remove any invalid characters and cast to the expected type
cleaned_value = re.sub(r"[^\d.]", "", cleaned_value) cleaned_value = re.sub(r"[^\d.]", "", cleaned_value)
return expected_type(cleaned_value) cleaned_value = expected_type(cleaned_value)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
logger.error( logger.error(
f"Failed to cast value '{value}' to {expected_type}. Error: {e}" f"Failed to cast value '{value}' to {expected_type}. Error: {e}"
@ -104,8 +100,15 @@ class SampleSpreadsheetImporter:
f"Invalid value: '{value}'. Expected type: {expected_type}." f"Invalid value: '{value}'. Expected type: {expected_type}."
) )
# Return cleaned value for other types # Avoid marking `None -> None` as a correction
return cleaned_value if cleaned_value == value:
default_applied = (
False # Ensure default_applied stays False for unchanged `value`.
)
if not isinstance(cleaned_value, (str, int, float)):
raise TypeError(f"Unexpected type for cleaned value: {type(cleaned_value)}")
return cleaned_value, default_applied
def import_spreadsheet(self, file): def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file) return self.import_spreadsheet_with_errors(file)
@ -200,30 +203,49 @@ class SampleSpreadsheetImporter:
logger.debug(f"Skipping empty row at index {index}") logger.debug(f"Skipping empty row at index {index}")
continue continue
# Record raw data for later use
raw_data.append({"row_num": index + 4, "data": list(row)})
# Ensure row has the expected number of columns # Ensure row has the expected number of columns
if len(row) < expected_columns: if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row)) row = list(row) + [None] * (expected_columns - len(row))
# Prepare the record dynamically based on headers # Reset flags for the current row
self.default_set = False
corrected = False
defaulted_columns = []
corrected_columns = []
record = {} record = {}
for col_idx, column_name in enumerate(headers): for col_idx, column_name in enumerate(headers):
original_value = row[col_idx] if col_idx < len(row) else None original_value = row[col_idx] if col_idx < len(row) else None
expected_type = self.get_expected_type(column_name) expected_type = self.get_expected_type(column_name)
# Call _clean_value dynamically with the correct column_name
try: try:
cleaned_value = self._clean_value( # Call `_clean_value` to clean the value and extract
# cleaning-related indicators
cleaned_value, default_applied = self._clean_value(
original_value, expected_type, column_name original_value, expected_type, column_name
) )
# Check if the cleaned value is meaningfully different from the
# original value
is_corrected = cleaned_value != original_value
# Append column to corrected columns only if the value was corrected
if is_corrected:
corrected = True
corrected_columns.append(column_name)
# Track default columns separately if a default was applied
if default_applied:
corrected = True
defaulted_columns.append(column_name)
# Update the record with cleaned value (store only the cleaned part,
# not the tuple)
record[column_name] = cleaned_value record[column_name] = cleaned_value
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
logger.error( logger.error(
f"Validation error for row {index + 4}," f"Validation error for row {index + 4}"
f" column '{column_name}': " f", column '{column_name}': {str(e)}"
f"{str(e)}"
) )
errors.append( errors.append(
{ {
@ -234,63 +256,71 @@ class SampleSpreadsheetImporter:
} }
) )
# Build metadata for the row
raw_data.append(
{
"row_num": index + 4,
"data": list(row), # Original data
"default_set": bool(
defaulted_columns
), # True if any defaults were applied
"corrected": corrected, # True if any value was corrected
# List of corrected columns (if any)
"corrected_columns": corrected_columns,
# List of defaulted columns (if any)
"defaulted_columns": defaulted_columns,
}
)
# Nested processing for data_collection_parameters # Nested processing for data_collection_parameters
record["data_collection_parameters"] = { record["data_collection_parameters"] = {
"directory": record.get("directory"), "directory": record.get("directory", ""),
"oscillation": record.get("oscillation"), "oscillation": record.get("oscillation", 0.0),
"aperture": record.get("aperture"), "aperture": record.get("aperture", None),
"exposure": record.get("exposure"), "exposure": record.get("exposure", 0.0),
"totalrange": record.get("totalrange"), "totalrange": record.get("totalrange", 0),
"transmission": record.get("transmission"), "transmission": record.get("transmission", 0),
"dose": record.get("dose"), "dose": record.get("dose", None),
"targetresolution": record.get("targetresolution"), "targetresolution": record.get("targetresolution", 0.0),
"datacollectiontype": record.get("datacollectiontype"), "datacollectiontype": record.get("datacollectiontype", None),
"processingpipeline": record.get("processingpipeline"), "processingpipeline": record.get("processingpipeline", None),
"spacegroupnumber": record.get("spacegroupnumber"), "spacegroupnumber": record.get("spacegroupnumber", None),
"cellparameters": record.get("cellparameters"), "cellparameters": record.get("cellparameters", None),
"rescutkey": record.get("rescutkey"), "rescutkey": record.get("rescutkey", None),
"rescutvalue": record.get("rescutvalue"), "rescutvalue": record.get("rescutvalue", 0.0),
"userresolution": record.get("userresolution"), "userresolution": record.get("userresolution", 0.0),
"pdbid": record.get("pdbid"), "pdbid": record.get("pdbid", ""),
"autoprocfull": record.get("autoprocfull"), "autoprocfull": record.get("autoprocfull", False),
"procfull": record.get("procfull"), "procfull": record.get("procfull", False),
"adpenabled": record.get("adpenabled"), "adpenabled": record.get("adpenabled", False),
"noano": record.get("noano"), "noano": record.get("noano", False),
"ffcscampaign": record.get("ffcscampaign"), "ffcscampaign": record.get("ffcscampaign", False),
"trustedhigh": record.get("trustedhigh"), "trustedhigh": record.get("trustedhigh", 0.0),
"autoprocextraparams": record.get("autoprocextraparams"), "autoprocextraparams": record.get("autoprocextraparams", None),
"chiphiangles": record.get("chiphiangles"), "chiphiangles": record.get("chiphiangles", 0.0),
} }
try: try:
# Validate the record # Validate the record
validated_record = SpreadsheetModel(**record) validated_record = SpreadsheetModel(**record)
# Add validated record to the model
model.append(validated_record) model.append(validated_record)
except ValidationError as e: except ValidationError as e:
logger.error(f"Validation error in row {index + 4}: {e}") logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors(): for error in e.errors():
field_path = error["loc"] field_path = error["loc"]
msg = error["msg"] msg = error["msg"]
column_name = headers[field_path[0]]
if field_path[0] == "data_collection_parameters":
subfield = field_path[1]
column_index = headers.index(subfield)
else:
field = field_path[0]
column_index = headers.index(field)
error_info = { error_info = {
"row": index + 4, "row": index + 4,
"cell": column_index, "column": column_name,
"value": row[column_index], "value": row[col_idx],
"message": msg, "message": msg,
} }
errors.append(error_info) errors.append(error_info)
self.model = model
logger.info( logger.info(
f"Finished processing {len(model)} records with {len(errors)} errors" f"Finished processing {len(model)} records with {len(errors)} errors"
) )
return self.model, errors, raw_data, headers # Include headers in the response return self.model, errors, raw_data, headers # Include headers in the response

View File

@ -29,7 +29,8 @@ const SpreadsheetTable = ({
setRawData, setRawData,
onCancel, onCancel,
fileBlob, fileBlob,
selectedShipment selectedShipment,
addinfo,
}) => { }) => {
const [localErrors, setLocalErrors] = useState(errors || []); const [localErrors, setLocalErrors] = useState(errors || []);
const [editingCell, setEditingCell] = useState({}); const [editingCell, setEditingCell] = useState({});
@ -38,7 +39,23 @@ const SpreadsheetTable = ({
const [showUpdateDialog, setShowUpdateDialog] = useState(false); const [showUpdateDialog, setShowUpdateDialog] = useState(false);
const [dewarsToReplace, setDewarsToReplace] = useState([]); const [dewarsToReplace, setDewarsToReplace] = useState([]);
const [dewarsToCreate, setDewarsToCreate] = useState(new Map()); const [dewarsToCreate, setDewarsToCreate] = useState(new Map());
const [correctionMetadata, setCorrectionMetadata] = useState(addinfo || []); // Store addinfo
const enhancedRawData = raw_data.map((row) => {
const metadata = correctionMetadata.find((info) => info.row_num === row.row_num) || {};
// Combine original row data with metadata
return {
...row,
corrected_columns: metadata.corrected_columns || [], // Columns corrected
default_set_columns: metadata.default_set || [], // Specific columns default-assigned
};
});
useEffect(() => {
console.log("Correction Metadata:", correctionMetadata);
console.log("Addinfo:", addinfo);
}, [correctionMetadata, addinfo]);
const initialNewDewarState = { const initialNewDewarState = {
number_of_pucks: 0, number_of_pucks: 0,
number_of_samples: 0, number_of_samples: 0,
@ -461,6 +478,14 @@ const SpreadsheetTable = ({
return ( return (
<TableContainer component={Paper}> <TableContainer component={Paper}>
<Box display="flex" justifyContent="space-between" mb={2}>
<Typography variant="body2" style={{ backgroundColor: "#e6fbe6", padding: "4px 8px", borderRadius: "4px" }}>
Default Assigned (Light Green)
</Typography>
<Typography variant="body2" style={{ backgroundColor: "#fff8e1", padding: "4px 8px", borderRadius: "4px" }}>
Corrected (Light Yellow)
</Typography>
</Box>
<Table> <Table>
<TableHead> <TableHead>
<TableRow> <TableRow>
@ -483,7 +508,7 @@ const SpreadsheetTable = ({
</Button> </Button>
</Box> </Box>
<TableBody> <TableBody>
{raw_data.map((row, rowIndex) => ( {enhancedRawData.map((row, rowIndex) => (
<TableRow key={rowIndex}> <TableRow key={rowIndex}>
{headers.map((header, colIndex) => { {headers.map((header, colIndex) => {
const key = `${row.row_num}-${header}`; const key = `${row.row_num}-${header}`;
@ -491,37 +516,30 @@ const SpreadsheetTable = ({
const isInvalid = !!errorMessage; const isInvalid = !!errorMessage;
const cellValue = row.data[colIndex]; const cellValue = row.data[colIndex];
const editingValue = editingCell[`${rowIndex}-${colIndex}`]; const editingValue = editingCell[`${rowIndex}-${colIndex}`];
const isCellCorrected = row.corrected_columns?.includes(header); // Check if this column is marked as corrected const isCellCorrected = row.corrected_columns?.includes(header); // Use corrected metadata
const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true const isDefaultAssigned = row.default_set_columns?.includes(header); // Dynamically match header name
// Dynamic styles for corrected cells
const cellStyle = {
backgroundColor:
isDefaultAssigned
? "#e6fbe6" // Light green for default values
: isCellCorrected
? "#fff8e1" // Light yellow for corrected values
: "transparent", // Default for others
color: isDefaultAssigned
? "#1b5e20" // Dark green for default values
: "inherit", // Default for others
fontWeight: (isCellCorrected || isDefaultAssigned) ? "bold" : "normal", // Bold text for any change
cursor: isInvalid ? "pointer" : "default", // Mouse pointer indicates interactive error cells
};
return ( return (
<TableCell <TableCell
key={colIndex} key={colIndex}
align="center" align="center"
style={cellStyle} style={{
backgroundColor: isDefaultAssigned
? "#e6fbe6" // Light green
: isCellCorrected
? "#fff8e1" // Light yellow
: "transparent",
color: isDefaultAssigned ? "#1b5e20" : "inherit",
fontWeight: isCellCorrected || isDefaultAssigned ? "bold" : "normal",
cursor: isInvalid ? "pointer" : "default",
}}
> >
<Tooltip <Tooltip
title={ title={
isDefaultAssigned isDefaultAssigned
? "This value was automatically assigned by the system as a default." ? "This value was automatically assigned as a default."
: isCellCorrected : isCellCorrected
? "Value corrected automatically by the system." ? `Field "${header}" was auto-corrected.`
: errorMessage || "" : errorMessage || ""
} }
arrow arrow

View File

@ -180,6 +180,7 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose, selectedShip
onCancel={handleCancel} onCancel={handleCancel}
fileBlob={fileBlob} fileBlob={fileBlob}
selectedShipment={selectedShipment} selectedShipment={selectedShipment}
addinfo={fileSummary.addinfo}
/> />
</Modal> </Modal>
)} )}