diff --git a/backend/app/routers/spreadsheet.py b/backend/app/routers/spreadsheet.py index f1be500..f32fa5f 100644 --- a/backend/app/routers/spreadsheet.py +++ b/backend/app/routers/spreadsheet.py @@ -40,7 +40,6 @@ async def download_template(): @router.post("/upload", response_model=SpreadsheetResponse) async def upload_file(file: UploadFile = File(...)): """Process the uploaded spreadsheet and return validation results.""" - from app.schemas import DataCollectionParameters try: logger.info(f"Received file: {file.filename}") @@ -70,25 +69,92 @@ async def upload_file(file: UploadFile = File(...)): # Construct the response model with the processed data # Update raw_data with corrected directory values + # Iterate through raw_data rows updated_raw_data = [] - for row in raw_data: - directory_value = row.get("directory") or row["data"][7] - try: - corrected_directory = DataCollectionParameters( - directory=directory_value - ).directory - corrected = ( - directory_value != corrected_directory - ) # Check if a correction was made - row["data"][7] = corrected_directory - row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}" - row["corrected"] = corrected # Mark the row as corrected or not - updated_raw_data.append(row) - except ValidationError as e: - logger.error( - f"[Row {row['row_num']}] Error validating directory: {e.errors()}" + for row in raw_data: + corrected = False # Tracks changes made in this row. + corrected_columns = [] # Stores names of columns corrected. + default_set = row.get("default_set", False) + + # Ensure raw data rows are padded to match the headers length. + if len(row["data"]) < len(headers): + padding_length = len(headers) - len(row["data"]) + logger.info( + f"Padding row {row.get('row_num')} with " + f"{padding_length} None values." ) + row["data"].extend([None] * padding_length) + + # Validate data and apply corrections column by column. + for col_index, col_name in enumerate(headers): + original_value = row["data"][col_index] + expected_type = importer.get_expected_type(col_name) + + try: + # Pass col_name explicitly to _clean_value + cleaned_value = importer._clean_value( + original_value, expected_type, col_name + ) + + # Check if a correction was applied + if cleaned_value != original_value: + corrected = True + corrected_columns.append(col_name) + + # Update "directory" metadata explicitly, if applicable + if col_name == "directory": + row["directory"] = cleaned_value + + # Update the raw data with the corrected value + row["data"][col_index] = cleaned_value + + # Log the correction + logger.info( + f"Corrected field '{col_name}' in row {row['row_num']}: " + f"Original='{original_value}', Corrected='{cleaned_value}'" + ) + + except (ValueError, TypeError) as e: + # Handle and log validation errors specific to this column + logger.error( + f"Validation failed for row " + f"{row['row_num']}, column '{col_name}': " + f"{str(e)}" + ) + errors.append( + { + "row": row["row_num"], + "column": col_name, + "value": original_value, + "message": str(e), + } + ) + + # Special case: Check and handle if "directory" was auto-corrected. + if ( + row.get("directory") + and len(row["data"]) > 7 + and row["data"][7] != row["directory"] + ): + corrected = True + corrected_columns.append("directory") + row["data"][7] = row["directory"] + + # Add correction metadata to the row if changes were made. + if corrected: + row["corrected"] = True + row["corrected_columns"] = corrected_columns + row["default_set"] = default_set + + # Add the processed row to the updated data list. + updated_raw_data.append(row) + + logger.info( + "Processing completed. " + f"Total rows processed: {len(raw_data)}, " + f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}" + ) response_data = SpreadsheetResponse( data=validated_model, diff --git a/backend/app/sample_models.py b/backend/app/sample_models.py index 9f53f33..c63e3b5 100644 --- a/backend/app/sample_models.py +++ b/backend/app/sample_models.py @@ -88,6 +88,14 @@ class SpreadsheetModel(BaseModel): ) from e return v + # if not v: # Handles None or empty cases + # default_value = "{sgPuck}/{sgPosition}" + # logger.warning( + # f"'directory' field is empty or None. Assigning default value: " + # f"{default_value}" + # ) + # return default_value + # class TELLModel(SpreadsheetModel): # input_order: int # samplemountcount: int = 0 diff --git a/backend/app/services/spreadsheet_service.py b/backend/app/services/spreadsheet_service.py index 9e6c476..798eab2 100644 --- a/backend/app/services/spreadsheet_service.py +++ b/backend/app/services/spreadsheet_service.py @@ -1,5 +1,6 @@ import logging import openpyxl +import re from pydantic import ValidationError from typing import List, Tuple from io import BytesIO @@ -61,14 +62,40 @@ class SampleSpreadsheetImporter: # Return type if column exists, else default to str return column_type_mapping.get(column_name, str) - def _clean_value(self, value, expected_type=None): - if value is None: + def _clean_value(self, value, expected_type=None, column_name=None): + """ + Cleans and validates the given value based on its expected type. + Different behavior is applied to specific columns if needed. + """ + if value is None or (isinstance(value, str) and value.strip() == ""): + # Handle empty or None values + if column_name == "directory": + logger.warning("Directory value is empty. Assigning default value.") + self.default_set = True # Flag to indicate a default value is set. + return "{sgPuck}/{sgPosition}" # Default directory + self.default_set = False return None + + # Convert to string and strip whitespaces + cleaned_value = str(value).strip() + + # Handle specific column behaviors if expected_type == str: - return str(value).strip() - if expected_type in [float, int]: + if expected_type == str: + if column_name is None: + logger.warning(f"Missing column_name for value: {value}") + elif column_name == "comments": + return " ".join(cleaned_value.split()) # Normalize excessive spaces + + else: + # Replace spaces with underscores for general string columns + return cleaned_value.replace(" ", "_") + + elif expected_type in [int, float]: try: - return expected_type(value) + # Remove any invalid characters and cast to the expected type + cleaned_value = re.sub(r"[^\d.]", "", cleaned_value) + return expected_type(cleaned_value) except (ValueError, TypeError) as e: logger.error( f"Failed to cast value '{value}' to {expected_type}. Error: {e}" @@ -76,9 +103,9 @@ class SampleSpreadsheetImporter: raise ValueError( f"Invalid value: '{value}'. Expected type: {expected_type}." ) - # Fallback for unhandled types - logger.warning(f"Unhandled type for value: '{value}'. Returning as-is.") - return value + + # Return cleaned value for other types + return cleaned_value def import_spreadsheet(self, file): return self.import_spreadsheet_with_errors(file) @@ -180,67 +207,67 @@ class SampleSpreadsheetImporter: if len(row) < expected_columns: row = list(row) + [None] * (expected_columns - len(row)) - # Prepare the record with cleaned values - record = { - "dewarname": self._clean_value(row[0], str), - "puckname": self._clean_value(row[1], str), - "pucktype": self._clean_value(row[2], str), - "crystalname": self._clean_value(row[3], str), - "positioninpuck": self._clean_value(row[4], int), - "priority": self._clean_value(row[5], int), - "comments": self._clean_value(row[6], str), - "proteinname": self._clean_value(row[8], str), - } + # Prepare the record dynamically based on headers + record = {} + for col_idx, column_name in enumerate(headers): + original_value = row[col_idx] if col_idx < len(row) else None + expected_type = self.get_expected_type(column_name) + + # Call _clean_value dynamically with the correct column_name + try: + cleaned_value = self._clean_value( + original_value, expected_type, column_name + ) + record[column_name] = cleaned_value + except (ValueError, TypeError) as e: + logger.error( + f"Validation error for row {index + 4}," + f" column '{column_name}': " + f"{str(e)}" + ) + errors.append( + { + "row": index + 4, + "column": column_name, + "value": original_value, + "message": str(e), + } + ) + + # Nested processing for data_collection_parameters record["data_collection_parameters"] = { - "directory": self._clean_value(row[7], str), - "oscillation": self._clean_value(row[9], float), - "aperture": self._clean_value(row[10], str), - "exposure": self._clean_value(row[11], float), - "totalrange": self._clean_value(row[12], float), - "transmission": self._clean_value(row[13], int), - "dose": self._clean_value(row[14], float), - "targetresolution": self._clean_value(row[15], float), - "datacollectiontype": self._clean_value(row[16], str), - "processingpipeline": self._clean_value(row[17], str), - "spacegroupnumber": self._clean_value(row[18], int), - "cellparameters": self._clean_value(row[19], str), - "rescutkey": self._clean_value(row[20], str), - "rescutvalue": self._clean_value(row[21], str), - "userresolution": self._clean_value(row[22], str), - "pdbid": self._clean_value(row[23], str), - "autoprocfull": self._clean_value(row[24], str), - "procfull": self._clean_value(row[25], str), - "adpenabled": self._clean_value(row[26], str), - "noano": self._clean_value(row[27], str), - "ffcscampaign": self._clean_value(row[28], str), - "trustedhigh": self._clean_value(row[29], str), - "autoprocextraparams": self._clean_value(row[30], str), - "chiphiangles": self._clean_value(row[31], str), + "directory": record.get("directory"), + "oscillation": record.get("oscillation"), + "aperture": record.get("aperture"), + "exposure": record.get("exposure"), + "totalrange": record.get("totalrange"), + "transmission": record.get("transmission"), + "dose": record.get("dose"), + "targetresolution": record.get("targetresolution"), + "datacollectiontype": record.get("datacollectiontype"), + "processingpipeline": record.get("processingpipeline"), + "spacegroupnumber": record.get("spacegroupnumber"), + "cellparameters": record.get("cellparameters"), + "rescutkey": record.get("rescutkey"), + "rescutvalue": record.get("rescutvalue"), + "userresolution": record.get("userresolution"), + "pdbid": record.get("pdbid"), + "autoprocfull": record.get("autoprocfull"), + "procfull": record.get("procfull"), + "adpenabled": record.get("adpenabled"), + "noano": record.get("noano"), + "ffcscampaign": record.get("ffcscampaign"), + "trustedhigh": record.get("trustedhigh"), + "autoprocextraparams": record.get("autoprocextraparams"), + "chiphiangles": record.get("chiphiangles"), } try: # Validate the record validated_record = SpreadsheetModel(**record) - # Get the corrected `directory` - corrected_directory = ( - validated_record.data_collection_parameters.directory - ) - - # Update `raw_data` to reflect the corrected value - raw_data[-1]["data"][ - 7 - ] = corrected_directory # Replace directory in raw data - raw_data[-1][ - "directory" - ] = corrected_directory # Add a top-level "directory" key - raw_data[-1]["default_set"] = ( - corrected_directory == "{sgPuck}/{sgPosition}" - ) - # Add validated record to the model model.append(validated_record) - except ValidationError as e: logger.error(f"Validation error in row {index + 4}: {e}") for error in e.errors(): diff --git a/frontend/src/components/SpreadsheetTable.tsx b/frontend/src/components/SpreadsheetTable.tsx index 152e4c9..817ae5c 100644 --- a/frontend/src/components/SpreadsheetTable.tsx +++ b/frontend/src/components/SpreadsheetTable.tsx @@ -488,41 +488,48 @@ const SpreadsheetTable = ({ {headers.map((header, colIndex) => { const key = `${row.row_num}-${header}`; const errorMessage = errorMap.get(key); - const isInvalid = !!errorMap.get(`${row.row_num}-${headers[colIndex]}`); + const isInvalid = !!errorMessage; const cellValue = row.data[colIndex]; const editingValue = editingCell[`${rowIndex}-${colIndex}`]; - const isReadonly = !isInvalid && editingCell[`${rowIndex}-${colIndex}`] === undefined; - const isCorrected = colIndex === 7 && row.corrected; // Corrected field exists and is true + const isCellCorrected = row.corrected_columns?.includes(header); // Check if this column is marked as corrected const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true + + // Dynamic styles for corrected cells + const cellStyle = { + backgroundColor: + isDefaultAssigned + ? "#e6fbe6" // Light green for default values + : isCellCorrected + ? "#fff8e1" // Light yellow for corrected values + : "transparent", // Default for others + color: isDefaultAssigned + ? "#1b5e20" // Dark green for default values + : "inherit", // Default for others + fontWeight: (isCellCorrected || isDefaultAssigned) ? "bold" : "normal", // Bold text for any change + cursor: isInvalid ? "pointer" : "default", // Mouse pointer indicates interactive error cells + }; + return ( {isInvalid ? ( setEditingCell({ ...editingCell, @@ -534,10 +541,9 @@ const SpreadsheetTable = ({ fullWidth variant="outlined" size="small" - disabled={!isInvalid} /> ) : ( - cellValue // This should reflect the updated 'raw_data' + cellValue )} diff --git a/frontend/src/components/UploadDialog.tsx b/frontend/src/components/UploadDialog.tsx index 8e8690e..925053b 100644 --- a/frontend/src/components/UploadDialog.tsx +++ b/frontend/src/components/UploadDialog.tsx @@ -69,22 +69,26 @@ const UploadDialog: React.FC = ({ open, onClose, selectedShip try { const response = await SpreadsheetService.uploadFileUploadPost(formData); - const { headers, raw_data, errors, dewars_count, pucks_count, samples_count } = response; setFileSummary({ - data: raw_data, - errors: errors, - raw_data: raw_data, - headers: headers, - dewars_count: dewars_count, - pucks_count: pucks_count, - samples_count: samples_count, + ...response, }); - - setIsLoading(false); setIsModalOpen(true); - } catch (error) { - setUploadError('Failed to upload file. Please try again.'); + } catch (error: any) { + if (error.response?.status === 400 && error.response.data?.errors) { + // Backend provided detailed error messages + const detailedErrors = error.response.data.errors; + setUploadError( + "Validation errors detected in the file. Please review and correct the following issues:\n" + + detailedErrors + .map((err: any) => `Row ${err.row}: ${err.errors.map((e: any) => e.msg).join(", ")}`) + .join("\n") + ); + } else { + // Fallback to generic error message + setUploadError('Failed to upload file. Please try again.'); + } + } finally { setIsLoading(false); } };