Add column type mapping and enhance validation

Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
This commit is contained in:
GotthardG
2025-01-07 15:45:08 +01:00
parent 54975b5919
commit 92306fcfa6
5 changed files with 503 additions and 401 deletions

View File

@ -1,4 +1,5 @@
from app.sample_models import SpreadsheetModel, SpreadsheetResponse
from app.sample_models import SpreadsheetResponse
from app.schemas import DataCollectionParameters
from fastapi import APIRouter, UploadFile, File, HTTPException
import logging
from app.services.spreadsheet_service import (
@ -39,6 +40,8 @@ async def download_template():
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results."""
from app.schemas import DataCollectionParameters
try:
logger.info(f"Received file: {file.filename}")
@ -66,19 +69,41 @@ async def upload_file(file: UploadFile = File(...)):
}
# Construct the response model with the processed data
# Update raw_data with corrected directory values
updated_raw_data = []
for row in raw_data:
directory_value = row.get("directory") or row["data"][7]
try:
corrected_directory = DataCollectionParameters(
directory=directory_value
).directory
corrected = (
directory_value != corrected_directory
) # Check if a correction was made
row["data"][7] = corrected_directory
row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
row["corrected"] = corrected # Mark the row as corrected or not
updated_raw_data.append(row)
except ValidationError as e:
logger.error(
f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
)
response_data = SpreadsheetResponse(
data=validated_model,
errors=errors,
raw_data=raw_data,
raw_data=updated_raw_data,
dewars_count=len(dewars),
dewars=list(dewars),
pucks_count=len(pucks),
pucks=list(pucks),
samples_count=len(samples),
samples=list(samples),
headers=headers, # Include headers in the response
headers=headers,
)
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use
for idx, row in enumerate(validated_model):
row_num = idx + 4 # Adjust row numbering if necessary
@ -110,30 +135,60 @@ async def validate_cell(data: dict):
col_name = data.get("column")
value = data.get("value")
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
# Get the full data for the row
current_row_data = row_storage.get_row(row_num)
# Update the cell value
current_row_data[col_name] = importer._clean_value(
value, importer.get_expected_type(col_name)
)
# Temporarily store the updated row data
row_storage.set_row(row_num, current_row_data)
logger.info(f"Validating cell: row {row_num}, column {col_name}, value {value}")
if not current_row_data:
logger.error(f"No data found for row {row_num}")
raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
try:
# Ensure we're using the full row data context for validation
SpreadsheetModel(
**current_row_data
) # Instantiates the Pydantic model, performing validation
logger.info(f"Validation succeeded for row {row_num}, column {col_name}")
return {"is_valid": True, "message": ""}
# Determine the expected type for the column
expected_type = importer.get_expected_type(col_name)
# Clean and validate the specific field
cleaned_value = importer._clean_value(value, expected_type)
current_row_data[col_name] = cleaned_value # Update raw data
# If the column belongs to the nested `data_collection_parameters`
if col_name in DataCollectionParameters.model_fields:
# Ensure current_nested is a Pydantic model
nested_data = current_row_data.get("data_collection_parameters")
if isinstance(
nested_data, dict
): # If it's a dict, convert it to a Pydantic model
current_nested = DataCollectionParameters(**nested_data)
elif isinstance(
nested_data, DataCollectionParameters
): # Already a valid model
current_nested = nested_data
else: # If it's None or anything else, create a new instance
current_nested = DataCollectionParameters()
# Convert the model to a dictionary, update the specific field, and
# re-create the Pydantic model
nested_params = current_nested.model_dump()
nested_params[col_name] = cleaned_value # Update the nested field
current_row_data["data_collection_parameters"] = DataCollectionParameters(
**nested_params
)
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
except ValidationError as e:
# Extract the first error message
message = e.errors()[0]["msg"]
# Handle and log errors
logger.error(f"Validation error details: {e.errors()}")
column_error = next(
(err for err in e.errors() if err.get("loc")[0] == col_name), None
)
message = column_error["msg"] if column_error else "Validation failed."
logger.error(
f"Validation failed for row {row_num}, column {col_name}: {message}"
f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
)
return {"is_valid": False, "message": message}
except Exception as e:
# Log unexpected issues
logger.error(f"Unexpected error during validation: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")