aaredb/backend/app/routers/spreadsheet.py
GotthardG 35369fd13c Add column type mapping and enhance validation
Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
2025-01-07 16:07:13 +01:00

207 lines
7.6 KiB
Python

from app.sample_models import SpreadsheetResponse
from app.schemas import DataCollectionParameters
from fastapi import APIRouter, UploadFile, File, HTTPException
import logging
from app.services.spreadsheet_service import (
SampleSpreadsheetImporter,
SpreadsheetImportError,
)
from fastapi.responses import FileResponse
import os
from pydantic import ValidationError # Import ValidationError here
from app.row_storage import row_storage # Import the RowStorage instance
router = APIRouter()
logger = logging.getLogger(__name__)
importer = (
SampleSpreadsheetImporter()
) # assuming this is a singleton or manageable instance
@router.get("/download-template", response_class=FileResponse)
async def download_template():
"""Serve a template file for spreadsheet upload."""
current_dir = os.path.dirname(__file__)
template_path = os.path.join(
current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx"
)
if not os.path.exists(template_path):
raise HTTPException(status_code=404, detail="Template file not found.")
return FileResponse(
template_path,
filename="template.xlsx",
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
)
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results."""
from app.schemas import DataCollectionParameters
try:
logger.info(f"Received file: {file.filename}")
# Validate file format
if not file.filename.endswith(".xlsx"):
logger.error("Invalid file format")
raise HTTPException(
status_code=400,
detail="Invalid file format. Please upload an .xlsx file.",
)
# Initialize the importer and process the spreadsheet
(
validated_model,
errors,
raw_data,
headers,
) = importer.import_spreadsheet_with_errors(file)
# Extract unique values for dewars, pucks, and samples
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
pucks = {sample.puckname for sample in validated_model if sample.puckname}
samples = {
sample.crystalname for sample in validated_model if sample.crystalname
}
# Construct the response model with the processed data
# Update raw_data with corrected directory values
updated_raw_data = []
for row in raw_data:
directory_value = row.get("directory") or row["data"][7]
try:
corrected_directory = DataCollectionParameters(
directory=directory_value
).directory
corrected = (
directory_value != corrected_directory
) # Check if a correction was made
row["data"][7] = corrected_directory
row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
row["corrected"] = corrected # Mark the row as corrected or not
updated_raw_data.append(row)
except ValidationError as e:
logger.error(
f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
)
response_data = SpreadsheetResponse(
data=validated_model,
errors=errors,
raw_data=updated_raw_data,
dewars_count=len(dewars),
dewars=list(dewars),
pucks_count=len(pucks),
pucks=list(pucks),
samples_count=len(samples),
samples=list(samples),
headers=headers,
)
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use
for idx, row in enumerate(validated_model):
row_num = idx + 4 # Adjust row numbering if necessary
row_storage.set_row(row_num, row.dict())
logger.info(
f"Returning response with {len(validated_model)}"
f"records and {len(errors)} errors."
)
return response_data
except SpreadsheetImportError as e:
logger.error(f"Spreadsheet import error: {str(e)}")
raise HTTPException(
status_code=400, detail=f"Error processing spreadsheet: {str(e)}"
)
except Exception as e:
logger.error(f"Unexpected error occurred: {str(e)}")
raise HTTPException(
status_code=500,
detail=f"Failed to upload file. Please try again. Error: {str(e)}",
)
@router.post("/validate-cell")
async def validate_cell(data: dict):
row_num = data.get("row")
col_name = data.get("column")
value = data.get("value")
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
# Retrieve the full data for the row
current_row_data = row_storage.get_row(row_num)
if not current_row_data:
logger.error(f"No data found for row {row_num}")
# Explicitly return a 404 error if the row is missing
raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
try:
# Determine the expected type for the column
expected_type = importer.get_expected_type(col_name)
# Clean and validate the specific field
cleaned_value = importer._clean_value(value, expected_type)
current_row_data[col_name] = cleaned_value # Update raw data
# Nested parameter handling for `DataCollectionParameters`
if col_name in DataCollectionParameters.model_fields:
nested_data = current_row_data.get("data_collection_parameters")
if isinstance(nested_data, dict):
# Convert dict to Pydantic model
current_nested = DataCollectionParameters(**nested_data)
elif isinstance(nested_data, DataCollectionParameters):
# Already a valid model
current_nested = nested_data
else:
current_nested = DataCollectionParameters()
# Update the nested model's field and reapply validation
nested_params = current_nested.model_dump()
nested_params[col_name] = cleaned_value
current_row_data["data_collection_parameters"] = DataCollectionParameters(
**nested_params
)
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
except ValidationError as e:
# Handle validation errors
logger.error(f"Validation error details: {e.errors()}")
column_error = next(
(err for err in e.errors() if err.get("loc")[0] == col_name), None
)
message = column_error["msg"] if column_error else "Validation failed."
logger.error(
f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
)
return {"is_valid": False, "message": message}
except ValueError as e:
# Handle expected typecasting or value errors specifically
error_message = str(e)
logger.warning(
f"Failed to validate value '{value}' for row "
f"{row_num}, column {col_name}: {error_message}"
)
raise HTTPException(
status_code=400,
detail=f"Validation failed for row "
f"{row_num}, column {col_name}: {error_message}",
)
except Exception as e:
# Log unexpected issues and re-raise HTTP 500
logger.error(f"Unexpected error during validation: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")