Refactor spreadsheet handling to track corrections and defaults

Improved the backend's value cleaning to differentiate between corrections and defaults, logging metadata for clearer traceability. Updated frontend to display corrected/defaulted fields with visual cues and tooltips for better user feedback. Enhanced data models and response structures to support this richer metadata.
This commit is contained in:
GotthardG 2025-01-14 21:46:11 +01:00
parent f6c19cc4da
commit c0951292d0
5 changed files with 213 additions and 94 deletions

View File

@ -96,9 +96,9 @@ async def upload_file(file: UploadFile = File(...)):
cleaned_value = importer._clean_value(
original_value, expected_type, col_name
)
corrected = False
# Check if a correction was applied
if cleaned_value != original_value:
if cleaned_value[0] != original_value:
corrected = True
corrected_columns.append(col_name)
@ -156,10 +156,78 @@ async def upload_file(file: UploadFile = File(...)):
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
)
updated_addinfo = [
{
"row_num": row["row_num"], # Identify row for the frontend
"corrected_columns": row.get("corrected_columns", []),
"default_set": [
col_name
for col_name in row.get("corrected_columns", [])
if row.get("default_set", False) and col_name == "directory"
], # Specify which keys are explicitly `default_set`
}
for row in updated_raw_data
if row.get("corrected")
or row.get("default_set") # Only include rows with changes
]
logger.debug(f"Constructed addinfo: {updated_addinfo}")
# Clean updated raw data in place
for row in updated_raw_data:
# Remove unwanted metadata fields
row.pop("corrected", None)
row.pop("corrected_columns", None)
row.pop("default_set", None)
row.pop("defaulted_columns", None)
row.pop("directory", None)
# Sanitize nested data (e.g., replace directory tuples with strings)
if "data" in row:
for idx, value in enumerate(row["data"]):
if isinstance(value, tuple):
row["data"][idx] = value[0] # Extract the first item (string)
# Confirm cleanup worked
for row in updated_raw_data:
unexpected_keys = [
k
for k in row.keys()
if k
in [
"corrected",
"corrected_columns",
"default_set",
"defaulted_columns",
"directory",
]
]
if unexpected_keys:
logger.error(f"Unexpected keys persist: {unexpected_keys}")
# Construct stripped_raw_data from the cleaned updated_raw_data
stripped_raw_data = [
{
k: v
for k, v in row.items()
if k
not in [
"corrected",
"corrected_columns",
"default_set",
"defaulted_columns",
"directory",
]
}
for row in updated_raw_data
]
# Verify the final stripped raw data before returning
# logger.debug(f"Sanitized raw_data for response: {stripped_raw_data}")
response_data = SpreadsheetResponse(
data=validated_model,
errors=errors,
raw_data=updated_raw_data,
raw_data=stripped_raw_data, # Final submission data
addinfo=updated_addinfo, # Metadata for frontend display
dewars_count=len(dewars),
dewars=list(dewars),
pucks_count=len(pucks),
@ -168,7 +236,6 @@ async def upload_file(file: UploadFile = File(...)):
samples=list(samples),
headers=headers,
)
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use
for idx, row in enumerate(validated_model):

View File

@ -110,7 +110,10 @@ class SpreadsheetModel(BaseModel):
class SpreadsheetResponse(BaseModel):
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
errors: List[Dict[str, Any]] # Errors encountered during validation
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
raw_data: List[
Dict[str, Any]
] # perhaps this has to be changed with th actual model !
addinfo: List[Dict[str, Any]]
dewars_count: int
dewars: List[str]
pucks_count: int

View File

@ -65,37 +65,33 @@ class SampleSpreadsheetImporter:
def _clean_value(self, value, expected_type=None, column_name=None):
"""
Cleans and validates the given value based on its expected type.
Different behavior is applied to specific columns if needed.
Tracks corrections and defaults applied separately.
"""
default_applied = False
# If the value is None or empty string
if value is None or (isinstance(value, str) and value.strip() == ""):
# Handle empty or None values
if column_name == "directory":
logger.warning("Directory value is empty. Assigning default value.")
self.default_set = True # Flag to indicate a default value is set.
return "{sgPuck}/{sgPosition}" # Default directory
self.default_set = False
return None
default_applied = True
return "{sgPuck}/{sgPosition}", default_applied
# Convert to string and strip whitespaces
return None, default_applied
# Clean up the value
cleaned_value = str(value).strip()
# Handle specific column behaviors
# Handle `type` casting logic
if expected_type == str:
if expected_type == str:
if column_name is None:
logger.warning(f"Missing column_name for value: {value}")
elif column_name == "comments":
return " ".join(cleaned_value.split()) # Normalize excessive spaces
else:
# Replace spaces with underscores for general string columns
return cleaned_value.replace(" ", "_")
if column_name == "comments":
return " ".join(cleaned_value.split()), default_applied
if " " in cleaned_value:
cleaned_value = cleaned_value.replace(" ", "_")
elif expected_type in [int, float]:
try:
# Remove any invalid characters and cast to the expected type
cleaned_value = re.sub(r"[^\d.]", "", cleaned_value)
return expected_type(cleaned_value)
cleaned_value = expected_type(cleaned_value)
except (ValueError, TypeError) as e:
logger.error(
f"Failed to cast value '{value}' to {expected_type}. Error: {e}"
@ -104,8 +100,15 @@ class SampleSpreadsheetImporter:
f"Invalid value: '{value}'. Expected type: {expected_type}."
)
# Return cleaned value for other types
return cleaned_value
# Avoid marking `None -> None` as a correction
if cleaned_value == value:
default_applied = (
False # Ensure default_applied stays False for unchanged `value`.
)
if not isinstance(cleaned_value, (str, int, float)):
raise TypeError(f"Unexpected type for cleaned value: {type(cleaned_value)}")
return cleaned_value, default_applied
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
@ -200,30 +203,49 @@ class SampleSpreadsheetImporter:
logger.debug(f"Skipping empty row at index {index}")
continue
# Record raw data for later use
raw_data.append({"row_num": index + 4, "data": list(row)})
# Ensure row has the expected number of columns
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
# Prepare the record dynamically based on headers
# Reset flags for the current row
self.default_set = False
corrected = False
defaulted_columns = []
corrected_columns = []
record = {}
for col_idx, column_name in enumerate(headers):
original_value = row[col_idx] if col_idx < len(row) else None
expected_type = self.get_expected_type(column_name)
# Call _clean_value dynamically with the correct column_name
try:
cleaned_value = self._clean_value(
# Call `_clean_value` to clean the value and extract
# cleaning-related indicators
cleaned_value, default_applied = self._clean_value(
original_value, expected_type, column_name
)
# Check if the cleaned value is meaningfully different from the
# original value
is_corrected = cleaned_value != original_value
# Append column to corrected columns only if the value was corrected
if is_corrected:
corrected = True
corrected_columns.append(column_name)
# Track default columns separately if a default was applied
if default_applied:
corrected = True
defaulted_columns.append(column_name)
# Update the record with cleaned value (store only the cleaned part,
# not the tuple)
record[column_name] = cleaned_value
except (ValueError, TypeError) as e:
logger.error(
f"Validation error for row {index + 4},"
f" column '{column_name}': "
f"{str(e)}"
f"Validation error for row {index + 4}"
f", column '{column_name}': {str(e)}"
)
errors.append(
{
@ -234,63 +256,71 @@ class SampleSpreadsheetImporter:
}
)
# Build metadata for the row
raw_data.append(
{
"row_num": index + 4,
"data": list(row), # Original data
"default_set": bool(
defaulted_columns
), # True if any defaults were applied
"corrected": corrected, # True if any value was corrected
# List of corrected columns (if any)
"corrected_columns": corrected_columns,
# List of defaulted columns (if any)
"defaulted_columns": defaulted_columns,
}
)
# Nested processing for data_collection_parameters
record["data_collection_parameters"] = {
"directory": record.get("directory"),
"oscillation": record.get("oscillation"),
"aperture": record.get("aperture"),
"exposure": record.get("exposure"),
"totalrange": record.get("totalrange"),
"transmission": record.get("transmission"),
"dose": record.get("dose"),
"targetresolution": record.get("targetresolution"),
"datacollectiontype": record.get("datacollectiontype"),
"processingpipeline": record.get("processingpipeline"),
"spacegroupnumber": record.get("spacegroupnumber"),
"cellparameters": record.get("cellparameters"),
"rescutkey": record.get("rescutkey"),
"rescutvalue": record.get("rescutvalue"),
"userresolution": record.get("userresolution"),
"pdbid": record.get("pdbid"),
"autoprocfull": record.get("autoprocfull"),
"procfull": record.get("procfull"),
"adpenabled": record.get("adpenabled"),
"noano": record.get("noano"),
"ffcscampaign": record.get("ffcscampaign"),
"trustedhigh": record.get("trustedhigh"),
"autoprocextraparams": record.get("autoprocextraparams"),
"chiphiangles": record.get("chiphiangles"),
"directory": record.get("directory", ""),
"oscillation": record.get("oscillation", 0.0),
"aperture": record.get("aperture", None),
"exposure": record.get("exposure", 0.0),
"totalrange": record.get("totalrange", 0),
"transmission": record.get("transmission", 0),
"dose": record.get("dose", None),
"targetresolution": record.get("targetresolution", 0.0),
"datacollectiontype": record.get("datacollectiontype", None),
"processingpipeline": record.get("processingpipeline", None),
"spacegroupnumber": record.get("spacegroupnumber", None),
"cellparameters": record.get("cellparameters", None),
"rescutkey": record.get("rescutkey", None),
"rescutvalue": record.get("rescutvalue", 0.0),
"userresolution": record.get("userresolution", 0.0),
"pdbid": record.get("pdbid", ""),
"autoprocfull": record.get("autoprocfull", False),
"procfull": record.get("procfull", False),
"adpenabled": record.get("adpenabled", False),
"noano": record.get("noano", False),
"ffcscampaign": record.get("ffcscampaign", False),
"trustedhigh": record.get("trustedhigh", 0.0),
"autoprocextraparams": record.get("autoprocextraparams", None),
"chiphiangles": record.get("chiphiangles", 0.0),
}
try:
# Validate the record
validated_record = SpreadsheetModel(**record)
# Add validated record to the model
model.append(validated_record)
except ValidationError as e:
logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors():
field_path = error["loc"]
msg = error["msg"]
if field_path[0] == "data_collection_parameters":
subfield = field_path[1]
column_index = headers.index(subfield)
else:
field = field_path[0]
column_index = headers.index(field)
column_name = headers[field_path[0]]
error_info = {
"row": index + 4,
"cell": column_index,
"value": row[column_index],
"column": column_name,
"value": row[col_idx],
"message": msg,
}
errors.append(error_info)
self.model = model
logger.info(
f"Finished processing {len(model)} records with {len(errors)} errors"
)
return self.model, errors, raw_data, headers # Include headers in the response

View File

@ -29,7 +29,8 @@ const SpreadsheetTable = ({
setRawData,
onCancel,
fileBlob,
selectedShipment
selectedShipment,
addinfo,
}) => {
const [localErrors, setLocalErrors] = useState(errors || []);
const [editingCell, setEditingCell] = useState({});
@ -38,7 +39,23 @@ const SpreadsheetTable = ({
const [showUpdateDialog, setShowUpdateDialog] = useState(false);
const [dewarsToReplace, setDewarsToReplace] = useState([]);
const [dewarsToCreate, setDewarsToCreate] = useState(new Map());
const [correctionMetadata, setCorrectionMetadata] = useState(addinfo || []); // Store addinfo
const enhancedRawData = raw_data.map((row) => {
const metadata = correctionMetadata.find((info) => info.row_num === row.row_num) || {};
// Combine original row data with metadata
return {
...row,
corrected_columns: metadata.corrected_columns || [], // Columns corrected
default_set_columns: metadata.default_set || [], // Specific columns default-assigned
};
});
useEffect(() => {
console.log("Correction Metadata:", correctionMetadata);
console.log("Addinfo:", addinfo);
}, [correctionMetadata, addinfo]);
const initialNewDewarState = {
number_of_pucks: 0,
number_of_samples: 0,
@ -461,6 +478,14 @@ const SpreadsheetTable = ({
return (
<TableContainer component={Paper}>
<Box display="flex" justifyContent="space-between" mb={2}>
<Typography variant="body2" style={{ backgroundColor: "#e6fbe6", padding: "4px 8px", borderRadius: "4px" }}>
Default Assigned (Light Green)
</Typography>
<Typography variant="body2" style={{ backgroundColor: "#fff8e1", padding: "4px 8px", borderRadius: "4px" }}>
Corrected (Light Yellow)
</Typography>
</Box>
<Table>
<TableHead>
<TableRow>
@ -483,7 +508,7 @@ const SpreadsheetTable = ({
</Button>
</Box>
<TableBody>
{raw_data.map((row, rowIndex) => (
{enhancedRawData.map((row, rowIndex) => (
<TableRow key={rowIndex}>
{headers.map((header, colIndex) => {
const key = `${row.row_num}-${header}`;
@ -491,37 +516,30 @@ const SpreadsheetTable = ({
const isInvalid = !!errorMessage;
const cellValue = row.data[colIndex];
const editingValue = editingCell[`${rowIndex}-${colIndex}`];
const isCellCorrected = row.corrected_columns?.includes(header); // Check if this column is marked as corrected
const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true
// Dynamic styles for corrected cells
const cellStyle = {
backgroundColor:
isDefaultAssigned
? "#e6fbe6" // Light green for default values
: isCellCorrected
? "#fff8e1" // Light yellow for corrected values
: "transparent", // Default for others
color: isDefaultAssigned
? "#1b5e20" // Dark green for default values
: "inherit", // Default for others
fontWeight: (isCellCorrected || isDefaultAssigned) ? "bold" : "normal", // Bold text for any change
cursor: isInvalid ? "pointer" : "default", // Mouse pointer indicates interactive error cells
};
const isCellCorrected = row.corrected_columns?.includes(header); // Use corrected metadata
const isDefaultAssigned = row.default_set_columns?.includes(header); // Dynamically match header name
return (
<TableCell
key={colIndex}
align="center"
style={cellStyle}
style={{
backgroundColor: isDefaultAssigned
? "#e6fbe6" // Light green
: isCellCorrected
? "#fff8e1" // Light yellow
: "transparent",
color: isDefaultAssigned ? "#1b5e20" : "inherit",
fontWeight: isCellCorrected || isDefaultAssigned ? "bold" : "normal",
cursor: isInvalid ? "pointer" : "default",
}}
>
<Tooltip
title={
isDefaultAssigned
? "This value was automatically assigned by the system as a default."
? "This value was automatically assigned as a default."
: isCellCorrected
? "Value corrected automatically by the system."
? `Field "${header}" was auto-corrected.`
: errorMessage || ""
}
arrow

View File

@ -180,6 +180,7 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose, selectedShip
onCancel={handleCancel}
fileBlob={fileBlob}
selectedShipment={selectedShipment}
addinfo={fileSummary.addinfo}
/>
</Modal>
)}