
Added logic to assign a default value of 1 to empty "priority" fields in the spreadsheet service. Adjusted the router to correctly track columns explicitly marked as defaulted.
341 lines
13 KiB
Python
341 lines
13 KiB
Python
from app.sample_models import SpreadsheetResponse
|
|
from app.schemas import DataCollectionParameters
|
|
from fastapi import APIRouter, UploadFile, File, HTTPException
|
|
import logging
|
|
from app.services.spreadsheet_service import (
|
|
SampleSpreadsheetImporter,
|
|
SpreadsheetImportError,
|
|
)
|
|
from fastapi.responses import FileResponse
|
|
import os
|
|
from pydantic import ValidationError # Import ValidationError here
|
|
from app.row_storage import row_storage # Import the RowStorage instance
|
|
|
|
router = APIRouter()
|
|
logger = logging.getLogger(__name__)
|
|
|
|
importer = (
|
|
SampleSpreadsheetImporter()
|
|
) # assuming this is a singleton or manageable instance
|
|
|
|
|
|
@router.get("/download-template", response_class=FileResponse)
|
|
async def download_template():
|
|
"""Serve a template file for spreadsheet upload."""
|
|
current_dir = os.path.dirname(__file__)
|
|
template_path = os.path.join(
|
|
current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx"
|
|
)
|
|
|
|
if not os.path.exists(template_path):
|
|
raise HTTPException(status_code=404, detail="Template file not found.")
|
|
|
|
return FileResponse(
|
|
template_path,
|
|
filename="template.xlsx",
|
|
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
)
|
|
|
|
|
|
@router.post("/upload", response_model=SpreadsheetResponse)
|
|
async def upload_file(file: UploadFile = File(...)):
|
|
"""Process the uploaded spreadsheet and return validation results."""
|
|
|
|
try:
|
|
logger.info(f"Received file: {file.filename}")
|
|
|
|
# Validate file format
|
|
if not file.filename.endswith(".xlsx"):
|
|
logger.error("Invalid file format")
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="Invalid file format. Please upload an .xlsx file.",
|
|
)
|
|
|
|
# Initialize the importer and process the spreadsheet
|
|
(
|
|
validated_model,
|
|
errors,
|
|
raw_data,
|
|
headers,
|
|
) = importer.import_spreadsheet_with_errors(file)
|
|
|
|
# Extract unique values for dewars, pucks, and samples
|
|
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
|
|
pucks = {sample.puckname for sample in validated_model if sample.puckname}
|
|
samples = {
|
|
sample.crystalname for sample in validated_model if sample.crystalname
|
|
}
|
|
|
|
# Construct the response model with the processed data
|
|
# Update raw_data with corrected directory values
|
|
# Iterate through raw_data rows
|
|
updated_raw_data = []
|
|
|
|
for row in raw_data:
|
|
corrected = False # Tracks changes made in this row.
|
|
corrected_columns = [] # Stores names of columns corrected.
|
|
default_set = row.get("default_set", False)
|
|
|
|
# Ensure raw data rows are padded to match the headers length.
|
|
if len(row["data"]) < len(headers):
|
|
padding_length = len(headers) - len(row["data"])
|
|
logger.info(
|
|
f"Padding row {row.get('row_num')} with "
|
|
f"{padding_length} None values."
|
|
)
|
|
row["data"].extend([None] * padding_length)
|
|
|
|
# Validate data and apply corrections column by column.
|
|
for col_index, col_name in enumerate(headers):
|
|
original_value = row["data"][col_index]
|
|
expected_type = importer.get_expected_type(col_name)
|
|
|
|
try:
|
|
# Pass col_name explicitly to _clean_value
|
|
cleaned_value = importer._clean_value(
|
|
original_value, expected_type, col_name
|
|
)
|
|
corrected = False
|
|
# Check if a correction was applied
|
|
if cleaned_value[0] != original_value:
|
|
corrected = True
|
|
corrected_columns.append(col_name)
|
|
|
|
# Update "directory" metadata explicitly, if applicable
|
|
if col_name == "directory":
|
|
row["directory"] = cleaned_value
|
|
|
|
# Update the raw data with the corrected value
|
|
row["data"][col_index] = cleaned_value
|
|
|
|
# Log the correction
|
|
logger.info(
|
|
f"Corrected field '{col_name}' in row {row['row_num']}: "
|
|
f"Original='{original_value}', Corrected='{cleaned_value}'"
|
|
)
|
|
|
|
except (ValueError, TypeError) as e:
|
|
# Handle and log validation errors specific to this column
|
|
logger.error(
|
|
f"Validation failed for row "
|
|
f"{row['row_num']}, column '{col_name}': "
|
|
f"{str(e)}"
|
|
)
|
|
errors.append(
|
|
{
|
|
"row": row["row_num"],
|
|
"column": col_name,
|
|
"value": original_value,
|
|
"message": str(e),
|
|
}
|
|
)
|
|
|
|
# Special case: Check and handle if "directory" was auto-corrected.
|
|
if (
|
|
row.get("directory")
|
|
and len(row["data"]) > 7
|
|
and row["data"][7] != row["directory"]
|
|
):
|
|
corrected = True
|
|
corrected_columns.append("directory")
|
|
row["data"][7] = row["directory"]
|
|
|
|
# Add correction metadata to the row if changes were made.
|
|
if corrected:
|
|
row["corrected"] = True
|
|
row["corrected_columns"] = corrected_columns
|
|
row["default_set"] = default_set
|
|
|
|
# Add the processed row to the updated data list.
|
|
updated_raw_data.append(row)
|
|
|
|
logger.info(
|
|
"Processing completed. "
|
|
f"Total rows processed: {len(raw_data)}, "
|
|
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
|
|
)
|
|
|
|
updated_addinfo = [
|
|
{
|
|
"row_num": row["row_num"], # Identify row for the frontend
|
|
"corrected_columns": row.get("corrected_columns", []),
|
|
"default_set": [
|
|
col_name
|
|
for col_name in row.get("corrected_columns", [])
|
|
if row.get("default_set", False)
|
|
and col_name in row.get("defaulted_columns", [])
|
|
], # Specify which keys are explicitly `default_set`
|
|
}
|
|
for row in updated_raw_data
|
|
if row.get("corrected")
|
|
or row.get("default_set") # Only include rows with changes
|
|
]
|
|
logger.debug(f"Constructed addinfo: {updated_addinfo}")
|
|
# Clean updated raw data in place
|
|
for row in updated_raw_data:
|
|
# Remove unwanted metadata fields
|
|
row.pop("corrected", None)
|
|
row.pop("corrected_columns", None)
|
|
row.pop("default_set", None)
|
|
row.pop("defaulted_columns", None)
|
|
row.pop("directory", None)
|
|
|
|
# Sanitize nested data (e.g., replace directory tuples with strings)
|
|
if "data" in row:
|
|
for idx, value in enumerate(row["data"]):
|
|
if isinstance(value, tuple):
|
|
row["data"][idx] = value[0] # Extract the first item (string)
|
|
|
|
# Confirm cleanup worked
|
|
for row in updated_raw_data:
|
|
unexpected_keys = [
|
|
k
|
|
for k in row.keys()
|
|
if k
|
|
in [
|
|
"corrected",
|
|
"corrected_columns",
|
|
"default_set",
|
|
"defaulted_columns",
|
|
"directory",
|
|
]
|
|
]
|
|
if unexpected_keys:
|
|
logger.error(f"Unexpected keys persist: {unexpected_keys}")
|
|
|
|
# Construct stripped_raw_data from the cleaned updated_raw_data
|
|
stripped_raw_data = [
|
|
{
|
|
k: v
|
|
for k, v in row.items()
|
|
if k
|
|
not in [
|
|
"corrected",
|
|
"corrected_columns",
|
|
"default_set",
|
|
"defaulted_columns",
|
|
"directory",
|
|
]
|
|
}
|
|
for row in updated_raw_data
|
|
]
|
|
|
|
# Verify the final stripped raw data before returning
|
|
# logger.debug(f"Sanitized raw_data for response: {stripped_raw_data}")
|
|
|
|
response_data = SpreadsheetResponse(
|
|
data=validated_model,
|
|
errors=errors,
|
|
raw_data=stripped_raw_data, # Final submission data
|
|
addinfo=updated_addinfo, # Metadata for frontend display
|
|
dewars_count=len(dewars),
|
|
dewars=list(dewars),
|
|
pucks_count=len(pucks),
|
|
pucks=list(pucks),
|
|
samples_count=len(samples),
|
|
samples=list(samples),
|
|
headers=headers,
|
|
)
|
|
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
|
|
# Store row data for future use
|
|
for idx, row in enumerate(validated_model):
|
|
row_num = idx + 4 # Adjust row numbering if necessary
|
|
row_storage.set_row(row_num, row.dict())
|
|
|
|
logger.info(
|
|
f"Returning response with {len(validated_model)}"
|
|
f"records and {len(errors)} errors."
|
|
)
|
|
return response_data
|
|
|
|
except SpreadsheetImportError as e:
|
|
logger.error(f"Spreadsheet import error: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=400, detail=f"Error processing spreadsheet: {str(e)}"
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error occurred: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail=f"Failed to upload file. Please try again. Error: {str(e)}",
|
|
)
|
|
|
|
|
|
@router.post("/validate-cell")
|
|
async def validate_cell(data: dict):
|
|
row_num = data.get("row")
|
|
col_name = data.get("column")
|
|
value = data.get("value")
|
|
|
|
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
|
|
|
|
# Retrieve the full data for the row
|
|
current_row_data = row_storage.get_row(row_num)
|
|
|
|
if not current_row_data:
|
|
logger.error(f"No data found for row {row_num}")
|
|
# Explicitly return a 404 error if the row is missing
|
|
raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
|
|
|
|
try:
|
|
# Determine the expected type for the column
|
|
expected_type = importer.get_expected_type(col_name)
|
|
|
|
# Clean and validate the specific field
|
|
cleaned_value = importer._clean_value(value, expected_type)
|
|
current_row_data[col_name] = cleaned_value # Update raw data
|
|
|
|
# Nested parameter handling for `DataCollectionParameters`
|
|
if col_name in DataCollectionParameters.model_fields:
|
|
nested_data = current_row_data.get("data_collection_parameters")
|
|
|
|
if isinstance(nested_data, dict):
|
|
# Convert dict to Pydantic model
|
|
current_nested = DataCollectionParameters(**nested_data)
|
|
elif isinstance(nested_data, DataCollectionParameters):
|
|
# Already a valid model
|
|
current_nested = nested_data
|
|
else:
|
|
current_nested = DataCollectionParameters()
|
|
|
|
# Update the nested model's field and reapply validation
|
|
nested_params = current_nested.model_dump()
|
|
nested_params[col_name] = cleaned_value
|
|
current_row_data["data_collection_parameters"] = DataCollectionParameters(
|
|
**nested_params
|
|
)
|
|
|
|
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
|
|
|
|
except ValidationError as e:
|
|
# Handle validation errors
|
|
logger.error(f"Validation error details: {e.errors()}")
|
|
column_error = next(
|
|
(err for err in e.errors() if err.get("loc")[0] == col_name), None
|
|
)
|
|
message = column_error["msg"] if column_error else "Validation failed."
|
|
logger.error(
|
|
f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
|
|
)
|
|
return {"is_valid": False, "message": message}
|
|
|
|
except ValueError as e:
|
|
# Handle expected typecasting or value errors specifically
|
|
error_message = str(e)
|
|
logger.warning(
|
|
f"Failed to validate value '{value}' for row "
|
|
f"{row_num}, column {col_name}: {error_message}"
|
|
)
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail=f"Validation failed for row "
|
|
f"{row_num}, column {col_name}: {error_message}",
|
|
)
|
|
|
|
except Exception as e:
|
|
# Log unexpected issues and re-raise HTTP 500
|
|
logger.error(f"Unexpected error during validation: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")
|