diff --git a/backend/app/routers/spreadsheet.py b/backend/app/routers/spreadsheet.py
index f1be500..f32fa5f 100644
--- a/backend/app/routers/spreadsheet.py
+++ b/backend/app/routers/spreadsheet.py
@@ -40,7 +40,6 @@ async def download_template():
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results."""
- from app.schemas import DataCollectionParameters
try:
logger.info(f"Received file: {file.filename}")
@@ -70,25 +69,92 @@ async def upload_file(file: UploadFile = File(...)):
# Construct the response model with the processed data
# Update raw_data with corrected directory values
+ # Iterate through raw_data rows
updated_raw_data = []
- for row in raw_data:
- directory_value = row.get("directory") or row["data"][7]
- try:
- corrected_directory = DataCollectionParameters(
- directory=directory_value
- ).directory
- corrected = (
- directory_value != corrected_directory
- ) # Check if a correction was made
- row["data"][7] = corrected_directory
- row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
- row["corrected"] = corrected # Mark the row as corrected or not
- updated_raw_data.append(row)
- except ValidationError as e:
- logger.error(
- f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
+ for row in raw_data:
+ corrected = False # Tracks changes made in this row.
+ corrected_columns = [] # Stores names of columns corrected.
+ default_set = row.get("default_set", False)
+
+ # Ensure raw data rows are padded to match the headers length.
+ if len(row["data"]) < len(headers):
+ padding_length = len(headers) - len(row["data"])
+ logger.info(
+ f"Padding row {row.get('row_num')} with "
+ f"{padding_length} None values."
)
+ row["data"].extend([None] * padding_length)
+
+ # Validate data and apply corrections column by column.
+ for col_index, col_name in enumerate(headers):
+ original_value = row["data"][col_index]
+ expected_type = importer.get_expected_type(col_name)
+
+ try:
+ # Pass col_name explicitly to _clean_value
+ cleaned_value = importer._clean_value(
+ original_value, expected_type, col_name
+ )
+
+ # Check if a correction was applied
+ if cleaned_value != original_value:
+ corrected = True
+ corrected_columns.append(col_name)
+
+ # Update "directory" metadata explicitly, if applicable
+ if col_name == "directory":
+ row["directory"] = cleaned_value
+
+ # Update the raw data with the corrected value
+ row["data"][col_index] = cleaned_value
+
+ # Log the correction
+ logger.info(
+ f"Corrected field '{col_name}' in row {row['row_num']}: "
+ f"Original='{original_value}', Corrected='{cleaned_value}'"
+ )
+
+ except (ValueError, TypeError) as e:
+ # Handle and log validation errors specific to this column
+ logger.error(
+ f"Validation failed for row "
+ f"{row['row_num']}, column '{col_name}': "
+ f"{str(e)}"
+ )
+ errors.append(
+ {
+ "row": row["row_num"],
+ "column": col_name,
+ "value": original_value,
+ "message": str(e),
+ }
+ )
+
+ # Special case: Check and handle if "directory" was auto-corrected.
+ if (
+ row.get("directory")
+ and len(row["data"]) > 7
+ and row["data"][7] != row["directory"]
+ ):
+ corrected = True
+ corrected_columns.append("directory")
+ row["data"][7] = row["directory"]
+
+ # Add correction metadata to the row if changes were made.
+ if corrected:
+ row["corrected"] = True
+ row["corrected_columns"] = corrected_columns
+ row["default_set"] = default_set
+
+ # Add the processed row to the updated data list.
+ updated_raw_data.append(row)
+
+ logger.info(
+ "Processing completed. "
+ f"Total rows processed: {len(raw_data)}, "
+ f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
+ )
response_data = SpreadsheetResponse(
data=validated_model,
diff --git a/backend/app/sample_models.py b/backend/app/sample_models.py
index 9f53f33..c63e3b5 100644
--- a/backend/app/sample_models.py
+++ b/backend/app/sample_models.py
@@ -88,6 +88,14 @@ class SpreadsheetModel(BaseModel):
) from e
return v
+ # if not v: # Handles None or empty cases
+ # default_value = "{sgPuck}/{sgPosition}"
+ # logger.warning(
+ # f"'directory' field is empty or None. Assigning default value: "
+ # f"{default_value}"
+ # )
+ # return default_value
+
# class TELLModel(SpreadsheetModel):
# input_order: int
# samplemountcount: int = 0
diff --git a/backend/app/services/spreadsheet_service.py b/backend/app/services/spreadsheet_service.py
index 9e6c476..798eab2 100644
--- a/backend/app/services/spreadsheet_service.py
+++ b/backend/app/services/spreadsheet_service.py
@@ -1,5 +1,6 @@
import logging
import openpyxl
+import re
from pydantic import ValidationError
from typing import List, Tuple
from io import BytesIO
@@ -61,14 +62,40 @@ class SampleSpreadsheetImporter:
# Return type if column exists, else default to str
return column_type_mapping.get(column_name, str)
- def _clean_value(self, value, expected_type=None):
- if value is None:
+ def _clean_value(self, value, expected_type=None, column_name=None):
+ """
+ Cleans and validates the given value based on its expected type.
+ Different behavior is applied to specific columns if needed.
+ """
+ if value is None or (isinstance(value, str) and value.strip() == ""):
+ # Handle empty or None values
+ if column_name == "directory":
+ logger.warning("Directory value is empty. Assigning default value.")
+ self.default_set = True # Flag to indicate a default value is set.
+ return "{sgPuck}/{sgPosition}" # Default directory
+ self.default_set = False
return None
+
+ # Convert to string and strip whitespaces
+ cleaned_value = str(value).strip()
+
+ # Handle specific column behaviors
if expected_type == str:
- return str(value).strip()
- if expected_type in [float, int]:
+ if expected_type == str:
+ if column_name is None:
+ logger.warning(f"Missing column_name for value: {value}")
+ elif column_name == "comments":
+ return " ".join(cleaned_value.split()) # Normalize excessive spaces
+
+ else:
+ # Replace spaces with underscores for general string columns
+ return cleaned_value.replace(" ", "_")
+
+ elif expected_type in [int, float]:
try:
- return expected_type(value)
+ # Remove any invalid characters and cast to the expected type
+ cleaned_value = re.sub(r"[^\d.]", "", cleaned_value)
+ return expected_type(cleaned_value)
except (ValueError, TypeError) as e:
logger.error(
f"Failed to cast value '{value}' to {expected_type}. Error: {e}"
@@ -76,9 +103,9 @@ class SampleSpreadsheetImporter:
raise ValueError(
f"Invalid value: '{value}'. Expected type: {expected_type}."
)
- # Fallback for unhandled types
- logger.warning(f"Unhandled type for value: '{value}'. Returning as-is.")
- return value
+
+ # Return cleaned value for other types
+ return cleaned_value
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
@@ -180,67 +207,67 @@ class SampleSpreadsheetImporter:
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
- # Prepare the record with cleaned values
- record = {
- "dewarname": self._clean_value(row[0], str),
- "puckname": self._clean_value(row[1], str),
- "pucktype": self._clean_value(row[2], str),
- "crystalname": self._clean_value(row[3], str),
- "positioninpuck": self._clean_value(row[4], int),
- "priority": self._clean_value(row[5], int),
- "comments": self._clean_value(row[6], str),
- "proteinname": self._clean_value(row[8], str),
- }
+ # Prepare the record dynamically based on headers
+ record = {}
+ for col_idx, column_name in enumerate(headers):
+ original_value = row[col_idx] if col_idx < len(row) else None
+ expected_type = self.get_expected_type(column_name)
+
+ # Call _clean_value dynamically with the correct column_name
+ try:
+ cleaned_value = self._clean_value(
+ original_value, expected_type, column_name
+ )
+ record[column_name] = cleaned_value
+ except (ValueError, TypeError) as e:
+ logger.error(
+ f"Validation error for row {index + 4},"
+ f" column '{column_name}': "
+ f"{str(e)}"
+ )
+ errors.append(
+ {
+ "row": index + 4,
+ "column": column_name,
+ "value": original_value,
+ "message": str(e),
+ }
+ )
+
+ # Nested processing for data_collection_parameters
record["data_collection_parameters"] = {
- "directory": self._clean_value(row[7], str),
- "oscillation": self._clean_value(row[9], float),
- "aperture": self._clean_value(row[10], str),
- "exposure": self._clean_value(row[11], float),
- "totalrange": self._clean_value(row[12], float),
- "transmission": self._clean_value(row[13], int),
- "dose": self._clean_value(row[14], float),
- "targetresolution": self._clean_value(row[15], float),
- "datacollectiontype": self._clean_value(row[16], str),
- "processingpipeline": self._clean_value(row[17], str),
- "spacegroupnumber": self._clean_value(row[18], int),
- "cellparameters": self._clean_value(row[19], str),
- "rescutkey": self._clean_value(row[20], str),
- "rescutvalue": self._clean_value(row[21], str),
- "userresolution": self._clean_value(row[22], str),
- "pdbid": self._clean_value(row[23], str),
- "autoprocfull": self._clean_value(row[24], str),
- "procfull": self._clean_value(row[25], str),
- "adpenabled": self._clean_value(row[26], str),
- "noano": self._clean_value(row[27], str),
- "ffcscampaign": self._clean_value(row[28], str),
- "trustedhigh": self._clean_value(row[29], str),
- "autoprocextraparams": self._clean_value(row[30], str),
- "chiphiangles": self._clean_value(row[31], str),
+ "directory": record.get("directory"),
+ "oscillation": record.get("oscillation"),
+ "aperture": record.get("aperture"),
+ "exposure": record.get("exposure"),
+ "totalrange": record.get("totalrange"),
+ "transmission": record.get("transmission"),
+ "dose": record.get("dose"),
+ "targetresolution": record.get("targetresolution"),
+ "datacollectiontype": record.get("datacollectiontype"),
+ "processingpipeline": record.get("processingpipeline"),
+ "spacegroupnumber": record.get("spacegroupnumber"),
+ "cellparameters": record.get("cellparameters"),
+ "rescutkey": record.get("rescutkey"),
+ "rescutvalue": record.get("rescutvalue"),
+ "userresolution": record.get("userresolution"),
+ "pdbid": record.get("pdbid"),
+ "autoprocfull": record.get("autoprocfull"),
+ "procfull": record.get("procfull"),
+ "adpenabled": record.get("adpenabled"),
+ "noano": record.get("noano"),
+ "ffcscampaign": record.get("ffcscampaign"),
+ "trustedhigh": record.get("trustedhigh"),
+ "autoprocextraparams": record.get("autoprocextraparams"),
+ "chiphiangles": record.get("chiphiangles"),
}
try:
# Validate the record
validated_record = SpreadsheetModel(**record)
- # Get the corrected `directory`
- corrected_directory = (
- validated_record.data_collection_parameters.directory
- )
-
- # Update `raw_data` to reflect the corrected value
- raw_data[-1]["data"][
- 7
- ] = corrected_directory # Replace directory in raw data
- raw_data[-1][
- "directory"
- ] = corrected_directory # Add a top-level "directory" key
- raw_data[-1]["default_set"] = (
- corrected_directory == "{sgPuck}/{sgPosition}"
- )
-
# Add validated record to the model
model.append(validated_record)
-
except ValidationError as e:
logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors():
diff --git a/frontend/src/components/SpreadsheetTable.tsx b/frontend/src/components/SpreadsheetTable.tsx
index 152e4c9..817ae5c 100644
--- a/frontend/src/components/SpreadsheetTable.tsx
+++ b/frontend/src/components/SpreadsheetTable.tsx
@@ -488,41 +488,48 @@ const SpreadsheetTable = ({
{headers.map((header, colIndex) => {
const key = `${row.row_num}-${header}`;
const errorMessage = errorMap.get(key);
- const isInvalid = !!errorMap.get(`${row.row_num}-${headers[colIndex]}`);
+ const isInvalid = !!errorMessage;
const cellValue = row.data[colIndex];
const editingValue = editingCell[`${rowIndex}-${colIndex}`];
- const isReadonly = !isInvalid && editingCell[`${rowIndex}-${colIndex}`] === undefined;
- const isCorrected = colIndex === 7 && row.corrected; // Corrected field exists and is true
+ const isCellCorrected = row.corrected_columns?.includes(header); // Check if this column is marked as corrected
const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true
+
+ // Dynamic styles for corrected cells
+ const cellStyle = {
+ backgroundColor:
+ isDefaultAssigned
+ ? "#e6fbe6" // Light green for default values
+ : isCellCorrected
+ ? "#fff8e1" // Light yellow for corrected values
+ : "transparent", // Default for others
+ color: isDefaultAssigned
+ ? "#1b5e20" // Dark green for default values
+ : "inherit", // Default for others
+ fontWeight: (isCellCorrected || isDefaultAssigned) ? "bold" : "normal", // Bold text for any change
+ cursor: isInvalid ? "pointer" : "default", // Mouse pointer indicates interactive error cells
+ };
+
return (
{isInvalid ? (
setEditingCell({
...editingCell,
@@ -534,10 +541,9 @@ const SpreadsheetTable = ({
fullWidth
variant="outlined"
size="small"
- disabled={!isInvalid}
/>
) : (
- cellValue // This should reflect the updated 'raw_data'
+ cellValue
)}
diff --git a/frontend/src/components/UploadDialog.tsx b/frontend/src/components/UploadDialog.tsx
index 8e8690e..925053b 100644
--- a/frontend/src/components/UploadDialog.tsx
+++ b/frontend/src/components/UploadDialog.tsx
@@ -69,22 +69,26 @@ const UploadDialog: React.FC = ({ open, onClose, selectedShip
try {
const response = await SpreadsheetService.uploadFileUploadPost(formData);
- const { headers, raw_data, errors, dewars_count, pucks_count, samples_count } = response;
setFileSummary({
- data: raw_data,
- errors: errors,
- raw_data: raw_data,
- headers: headers,
- dewars_count: dewars_count,
- pucks_count: pucks_count,
- samples_count: samples_count,
+ ...response,
});
-
- setIsLoading(false);
setIsModalOpen(true);
- } catch (error) {
- setUploadError('Failed to upload file. Please try again.');
+ } catch (error: any) {
+ if (error.response?.status === 400 && error.response.data?.errors) {
+ // Backend provided detailed error messages
+ const detailedErrors = error.response.data.errors;
+ setUploadError(
+ "Validation errors detected in the file. Please review and correct the following issues:\n" +
+ detailedErrors
+ .map((err: any) => `Row ${err.row}: ${err.errors.map((e: any) => e.msg).join(", ")}`)
+ .join("\n")
+ );
+ } else {
+ // Fallback to generic error message
+ setUploadError('Failed to upload file. Please try again.');
+ }
+ } finally {
setIsLoading(false);
}
};