aaredb/backend/app/routers/spreadsheet.py
GotthardG f6c19cc4da Refactor spreadsheet processing to improve validation logic
Enhanced value cleaning and validation for spreadsheet data with dynamic handling of columns and corrections. Improved feedback for users with detailed error messages and visual indicators for corrected or defaulted values. Simplified backend and frontend logic for better maintainability and usability.
2025-01-13 21:55:15 +01:00

273 lines
10 KiB
Python

from app.sample_models import SpreadsheetResponse
from app.schemas import DataCollectionParameters
from fastapi import APIRouter, UploadFile, File, HTTPException
import logging
from app.services.spreadsheet_service import (
SampleSpreadsheetImporter,
SpreadsheetImportError,
)
from fastapi.responses import FileResponse
import os
from pydantic import ValidationError # Import ValidationError here
from app.row_storage import row_storage # Import the RowStorage instance
router = APIRouter()
logger = logging.getLogger(__name__)
importer = (
SampleSpreadsheetImporter()
) # assuming this is a singleton or manageable instance
@router.get("/download-template", response_class=FileResponse)
async def download_template():
"""Serve a template file for spreadsheet upload."""
current_dir = os.path.dirname(__file__)
template_path = os.path.join(
current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx"
)
if not os.path.exists(template_path):
raise HTTPException(status_code=404, detail="Template file not found.")
return FileResponse(
template_path,
filename="template.xlsx",
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results."""
try:
logger.info(f"Received file: {file.filename}")
# Validate file format
if not file.filename.endswith(".xlsx"):
logger.error("Invalid file format")
raise HTTPException(
status_code=400,
detail="Invalid file format. Please upload an .xlsx file.",
)
# Initialize the importer and process the spreadsheet
(
validated_model,
errors,
raw_data,
headers,
) = importer.import_spreadsheet_with_errors(file)
# Extract unique values for dewars, pucks, and samples
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
pucks = {sample.puckname for sample in validated_model if sample.puckname}
samples = {
sample.crystalname for sample in validated_model if sample.crystalname
}
# Construct the response model with the processed data
# Update raw_data with corrected directory values
# Iterate through raw_data rows
updated_raw_data = []
for row in raw_data:
corrected = False # Tracks changes made in this row.
corrected_columns = [] # Stores names of columns corrected.
default_set = row.get("default_set", False)
# Ensure raw data rows are padded to match the headers length.
if len(row["data"]) < len(headers):
padding_length = len(headers) - len(row["data"])
logger.info(
f"Padding row {row.get('row_num')} with "
f"{padding_length} None values."
)
row["data"].extend([None] * padding_length)
# Validate data and apply corrections column by column.
for col_index, col_name in enumerate(headers):
original_value = row["data"][col_index]
expected_type = importer.get_expected_type(col_name)
try:
# Pass col_name explicitly to _clean_value
cleaned_value = importer._clean_value(
original_value, expected_type, col_name
)
# Check if a correction was applied
if cleaned_value != original_value:
corrected = True
corrected_columns.append(col_name)
# Update "directory" metadata explicitly, if applicable
if col_name == "directory":
row["directory"] = cleaned_value
# Update the raw data with the corrected value
row["data"][col_index] = cleaned_value
# Log the correction
logger.info(
f"Corrected field '{col_name}' in row {row['row_num']}: "
f"Original='{original_value}', Corrected='{cleaned_value}'"
)
except (ValueError, TypeError) as e:
# Handle and log validation errors specific to this column
logger.error(
f"Validation failed for row "
f"{row['row_num']}, column '{col_name}': "
f"{str(e)}"
)
errors.append(
{
"row": row["row_num"],
"column": col_name,
"value": original_value,
"message": str(e),
}
)
# Special case: Check and handle if "directory" was auto-corrected.
if (
row.get("directory")
and len(row["data"]) > 7
and row["data"][7] != row["directory"]
):
corrected = True
corrected_columns.append("directory")
row["data"][7] = row["directory"]
# Add correction metadata to the row if changes were made.
if corrected:
row["corrected"] = True
row["corrected_columns"] = corrected_columns
row["default_set"] = default_set
# Add the processed row to the updated data list.
updated_raw_data.append(row)
logger.info(
"Processing completed. "
f"Total rows processed: {len(raw_data)}, "
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
)
response_data = SpreadsheetResponse(
data=validated_model,
errors=errors,
raw_data=updated_raw_data,
dewars_count=len(dewars),
dewars=list(dewars),
pucks_count=len(pucks),
pucks=list(pucks),
samples_count=len(samples),
samples=list(samples),
headers=headers,
)
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use
for idx, row in enumerate(validated_model):
row_num = idx + 4 # Adjust row numbering if necessary
row_storage.set_row(row_num, row.dict())
logger.info(
f"Returning response with {len(validated_model)}"
f"records and {len(errors)} errors."
)
return response_data
except SpreadsheetImportError as e:
logger.error(f"Spreadsheet import error: {str(e)}")
raise HTTPException(
status_code=400, detail=f"Error processing spreadsheet: {str(e)}"
)
except Exception as e:
logger.error(f"Unexpected error occurred: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to upload file. Please try again. Error: {str(e)}",
)
@router.post("/validate-cell")
async def validate_cell(data: dict):
row_num = data.get("row")
col_name = data.get("column")
value = data.get("value")
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
# Retrieve the full data for the row
current_row_data = row_storage.get_row(row_num)
if not current_row_data:
logger.error(f"No data found for row {row_num}")
# Explicitly return a 404 error if the row is missing
raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
try:
# Determine the expected type for the column
expected_type = importer.get_expected_type(col_name)
# Clean and validate the specific field
cleaned_value = importer._clean_value(value, expected_type)
current_row_data[col_name] = cleaned_value # Update raw data
# Nested parameter handling for `DataCollectionParameters`
if col_name in DataCollectionParameters.model_fields:
nested_data = current_row_data.get("data_collection_parameters")
if isinstance(nested_data, dict):
# Convert dict to Pydantic model
current_nested = DataCollectionParameters(**nested_data)
elif isinstance(nested_data, DataCollectionParameters):
# Already a valid model
current_nested = nested_data
else:
current_nested = DataCollectionParameters()
# Update the nested model's field and reapply validation
nested_params = current_nested.model_dump()
nested_params[col_name] = cleaned_value
current_row_data["data_collection_parameters"] = DataCollectionParameters(
**nested_params
)
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
except ValidationError as e:
# Handle validation errors
logger.error(f"Validation error details: {e.errors()}")
column_error = next(
(err for err in e.errors() if err.get("loc")[0] == col_name), None
)
message = column_error["msg"] if column_error else "Validation failed."
logger.error(
f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
)
return {"is_valid": False, "message": message}
except ValueError as e:
# Handle expected typecasting or value errors specifically
error_message = str(e)
logger.warning(
f"Failed to validate value '{value}' for row "
f"{row_num}, column {col_name}: {error_message}"
)
raise HTTPException(
status_code=400,
detail=f"Validation failed for row "
f"{row_num}, column {col_name}: {error_message}",
)
except Exception as e:
# Log unexpected issues and re-raise HTTP 500
logger.error(f"Unexpected error during validation: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")