
Enhanced value cleaning and validation for spreadsheet data with dynamic handling of columns and corrections. Improved feedback for users with detailed error messages and visual indicators for corrected or defaulted values. Simplified backend and frontend logic for better maintainability and usability.
273 lines
10 KiB
Python
273 lines
10 KiB
Python
from app.sample_models import SpreadsheetResponse
|
|
from app.schemas import DataCollectionParameters
|
|
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
import logging
|
|
from app.services.spreadsheet_service import (
|
|
SampleSpreadsheetImporter,
|
|
SpreadsheetImportError,
|
|
)
|
|
from fastapi.responses import FileResponse
|
|
import os
|
|
from pydantic import ValidationError # Import ValidationError here
|
|
from app.row_storage import row_storage # Import the RowStorage instance
|
|
|
|
router = APIRouter()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
importer = (
|
|
SampleSpreadsheetImporter()
|
|
) # assuming this is a singleton or manageable instance
|
|
|
|
|
|
@router.get("/download-template", response_class=FileResponse)
|
|
async def download_template():
|
|
"""Serve a template file for spreadsheet upload."""
|
|
current_dir = os.path.dirname(__file__)
|
|
template_path = os.path.join(
|
|
current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx"
|
|
)
|
|
|
|
if not os.path.exists(template_path):
|
|
raise HTTPException(status_code=404, detail="Template file not found.")
|
|
|
|
return FileResponse(
|
|
template_path,
|
|
filename="template.xlsx",
|
|
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
)
|
|
|
|
|
|
@router.post("/upload", response_model=SpreadsheetResponse)
|
|
async def upload_file(file: UploadFile = File(...)):
|
|
"""Process the uploaded spreadsheet and return validation results."""
|
|
|
|
try:
|
|
logger.info(f"Received file: {file.filename}")
|
|
|
|
# Validate file format
|
|
if not file.filename.endswith(".xlsx"):
|
|
logger.error("Invalid file format")
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Invalid file format. Please upload an .xlsx file.",
|
|
)
|
|
|
|
# Initialize the importer and process the spreadsheet
|
|
(
|
|
validated_model,
|
|
errors,
|
|
raw_data,
|
|
headers,
|
|
) = importer.import_spreadsheet_with_errors(file)
|
|
|
|
# Extract unique values for dewars, pucks, and samples
|
|
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
|
|
pucks = {sample.puckname for sample in validated_model if sample.puckname}
|
|
samples = {
|
|
sample.crystalname for sample in validated_model if sample.crystalname
|
|
}
|
|
|
|
# Construct the response model with the processed data
|
|
# Update raw_data with corrected directory values
|
|
# Iterate through raw_data rows
|
|
updated_raw_data = []
|
|
|
|
for row in raw_data:
|
|
corrected = False # Tracks changes made in this row.
|
|
corrected_columns = [] # Stores names of columns corrected.
|
|
default_set = row.get("default_set", False)
|
|
|
|
# Ensure raw data rows are padded to match the headers length.
|
|
if len(row["data"]) < len(headers):
|
|
padding_length = len(headers) - len(row["data"])
|
|
logger.info(
|
|
f"Padding row {row.get('row_num')} with "
|
|
f"{padding_length} None values."
|
|
)
|
|
row["data"].extend([None] * padding_length)
|
|
|
|
# Validate data and apply corrections column by column.
|
|
for col_index, col_name in enumerate(headers):
|
|
original_value = row["data"][col_index]
|
|
expected_type = importer.get_expected_type(col_name)
|
|
|
|
try:
|
|
# Pass col_name explicitly to _clean_value
|
|
cleaned_value = importer._clean_value(
|
|
original_value, expected_type, col_name
|
|
)
|
|
|
|
# Check if a correction was applied
|
|
if cleaned_value != original_value:
|
|
corrected = True
|
|
corrected_columns.append(col_name)
|
|
|
|
# Update "directory" metadata explicitly, if applicable
|
|
if col_name == "directory":
|
|
row["directory"] = cleaned_value
|
|
|
|
# Update the raw data with the corrected value
|
|
row["data"][col_index] = cleaned_value
|
|
|
|
# Log the correction
|
|
logger.info(
|
|
f"Corrected field '{col_name}' in row {row['row_num']}: "
|
|
f"Original='{original_value}', Corrected='{cleaned_value}'"
|
|
)
|
|
|
|
except (ValueError, TypeError) as e:
|
|
# Handle and log validation errors specific to this column
|
|
logger.error(
|
|
f"Validation failed for row "
|
|
f"{row['row_num']}, column '{col_name}': "
|
|
f"{str(e)}"
|
|
)
|
|
errors.append(
|
|
{
|
|
"row": row["row_num"],
|
|
"column": col_name,
|
|
"value": original_value,
|
|
"message": str(e),
|
|
}
|
|
)
|
|
|
|
# Special case: Check and handle if "directory" was auto-corrected.
|
|
if (
|
|
row.get("directory")
|
|
and len(row["data"]) > 7
|
|
and row["data"][7] != row["directory"]
|
|
):
|
|
corrected = True
|
|
corrected_columns.append("directory")
|
|
row["data"][7] = row["directory"]
|
|
|
|
# Add correction metadata to the row if changes were made.
|
|
if corrected:
|
|
row["corrected"] = True
|
|
row["corrected_columns"] = corrected_columns
|
|
row["default_set"] = default_set
|
|
|
|
# Add the processed row to the updated data list.
|
|
updated_raw_data.append(row)
|
|
|
|
logger.info(
|
|
"Processing completed. "
|
|
f"Total rows processed: {len(raw_data)}, "
|
|
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
|
|
)
|
|
|
|
response_data = SpreadsheetResponse(
|
|
data=validated_model,
|
|
errors=errors,
|
|
raw_data=updated_raw_data,
|
|
dewars_count=len(dewars),
|
|
dewars=list(dewars),
|
|
pucks_count=len(pucks),
|
|
pucks=list(pucks),
|
|
samples_count=len(samples),
|
|
samples=list(samples),
|
|
headers=headers,
|
|
)
|
|
|
|
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
|
|
# Store row data for future use
|
|
for idx, row in enumerate(validated_model):
|
|
row_num = idx + 4 # Adjust row numbering if necessary
|
|
row_storage.set_row(row_num, row.dict())
|
|
|
|
logger.info(
|
|
f"Returning response with {len(validated_model)}"
|
|
f"records and {len(errors)} errors."
|
|
)
|
|
return response_data
|
|
|
|
except SpreadsheetImportError as e:
|
|
logger.error(f"Spreadsheet import error: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=400, detail=f"Error processing spreadsheet: {str(e)}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error occurred: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to upload file. Please try again. Error: {str(e)}",
|
|
)
|
|
|
|
|
|
@router.post("/validate-cell")
|
|
async def validate_cell(data: dict):
|
|
row_num = data.get("row")
|
|
col_name = data.get("column")
|
|
value = data.get("value")
|
|
|
|
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
|
|
|
|
# Retrieve the full data for the row
|
|
current_row_data = row_storage.get_row(row_num)
|
|
|
|
if not current_row_data:
|
|
logger.error(f"No data found for row {row_num}")
|
|
# Explicitly return a 404 error if the row is missing
|
|
raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
|
|
|
|
try:
|
|
# Determine the expected type for the column
|
|
expected_type = importer.get_expected_type(col_name)
|
|
|
|
# Clean and validate the specific field
|
|
cleaned_value = importer._clean_value(value, expected_type)
|
|
current_row_data[col_name] = cleaned_value # Update raw data
|
|
|
|
# Nested parameter handling for `DataCollectionParameters`
|
|
if col_name in DataCollectionParameters.model_fields:
|
|
nested_data = current_row_data.get("data_collection_parameters")
|
|
|
|
if isinstance(nested_data, dict):
|
|
# Convert dict to Pydantic model
|
|
current_nested = DataCollectionParameters(**nested_data)
|
|
elif isinstance(nested_data, DataCollectionParameters):
|
|
# Already a valid model
|
|
current_nested = nested_data
|
|
else:
|
|
current_nested = DataCollectionParameters()
|
|
|
|
# Update the nested model's field and reapply validation
|
|
nested_params = current_nested.model_dump()
|
|
nested_params[col_name] = cleaned_value
|
|
current_row_data["data_collection_parameters"] = DataCollectionParameters(
|
|
**nested_params
|
|
)
|
|
|
|
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
|
|
|
|
except ValidationError as e:
|
|
# Handle validation errors
|
|
logger.error(f"Validation error details: {e.errors()}")
|
|
column_error = next(
|
|
(err for err in e.errors() if err.get("loc")[0] == col_name), None
|
|
)
|
|
message = column_error["msg"] if column_error else "Validation failed."
|
|
logger.error(
|
|
f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
|
|
)
|
|
return {"is_valid": False, "message": message}
|
|
|
|
except ValueError as e:
|
|
# Handle expected typecasting or value errors specifically
|
|
error_message = str(e)
|
|
logger.warning(
|
|
f"Failed to validate value '{value}' for row "
|
|
f"{row_num}, column {col_name}: {error_message}"
|
|
)
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Validation failed for row "
|
|
f"{row_num}, column {col_name}: {error_message}",
|
|
)
|
|
|
|
except Exception as e:
|
|
# Log unexpected issues and re-raise HTTP 500
|
|
logger.error(f"Unexpected error during validation: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")
|