from app.sample_models import SpreadsheetResponse from app.schemas import DataCollectionParameters from fastapi import APIRouter, UploadFile, File, HTTPException import logging from app.services.spreadsheet_service import ( SampleSpreadsheetImporter, SpreadsheetImportError, ) from fastapi.responses import FileResponse import os from pydantic import ValidationError # Import ValidationError here from app.row_storage import row_storage # Import the RowStorage instance router = APIRouter() logger = logging.getLogger(__name__) importer = ( SampleSpreadsheetImporter() ) # assuming this is a singleton or manageable instance @router.get("/download-template", response_class=FileResponse) async def download_template(): """Serve a template file for spreadsheet upload.""" current_dir = os.path.dirname(__file__) template_path = os.path.join( current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx" ) if not os.path.exists(template_path): raise HTTPException(status_code=404, detail="Template file not found.") return FileResponse( template_path, filename="template.xlsx", media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) @router.post("/upload", response_model=SpreadsheetResponse) async def upload_file(file: UploadFile = File(...)): """Process the uploaded spreadsheet and return validation results.""" try: logger.info(f"Received file: {file.filename}") # Validate file format if not file.filename.endswith(".xlsx"): logger.error("Invalid file format") raise HTTPException( status_code=400, detail="Invalid file format. Please upload an .xlsx file.", ) # Initialize the importer and process the spreadsheet ( validated_model, errors, raw_data, headers, ) = importer.import_spreadsheet_with_errors(file) # Extract unique values for dewars, pucks, and samples dewars = {sample.dewarname for sample in validated_model if sample.dewarname} pucks = {sample.puckname for sample in validated_model if sample.puckname} samples = { sample.crystalname for sample in validated_model if sample.crystalname } # Construct the response model with the processed data # Update raw_data with corrected directory values # Iterate through raw_data rows updated_raw_data = [] for row in raw_data: corrected = False # Tracks changes made in this row. corrected_columns = [] # Stores names of columns corrected. default_set = row.get("default_set", False) # Ensure raw data rows are padded to match the headers length. if len(row["data"]) < len(headers): padding_length = len(headers) - len(row["data"]) logger.info( f"Padding row {row.get('row_num')} with " f"{padding_length} None values." ) row["data"].extend([None] * padding_length) # Validate data and apply corrections column by column. for col_index, col_name in enumerate(headers): original_value = row["data"][col_index] expected_type = importer.get_expected_type(col_name) try: # Pass col_name explicitly to _clean_value cleaned_value = importer._clean_value( original_value, expected_type, col_name ) corrected = False # Check if a correction was applied if cleaned_value[0] != original_value: corrected = True corrected_columns.append(col_name) # Update "directory" metadata explicitly, if applicable if col_name == "directory": row["directory"] = cleaned_value # Update the raw data with the corrected value row["data"][col_index] = cleaned_value # Log the correction logger.info( f"Corrected field '{col_name}' in row {row['row_num']}: " f"Original='{original_value}', Corrected='{cleaned_value}'" ) except (ValueError, TypeError) as e: # Handle and log validation errors specific to this column logger.error( f"Validation failed for row " f"{row['row_num']}, column '{col_name}': " f"{str(e)}" ) errors.append( { "row": row["row_num"], "column": col_name, "value": original_value, "message": str(e), } ) # Special case: Check and handle if "directory" was auto-corrected. if ( row.get("directory") and len(row["data"]) > 7 and row["data"][7] != row["directory"] ): corrected = True corrected_columns.append("directory") row["data"][7] = row["directory"] # Add correction metadata to the row if changes were made. if corrected: row["corrected"] = True row["corrected_columns"] = corrected_columns row["default_set"] = default_set # Add the processed row to the updated data list. updated_raw_data.append(row) logger.info( "Processing completed. " f"Total rows processed: {len(raw_data)}, " f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}" ) updated_addinfo = [ { "row_num": row["row_num"], # Identify row for the frontend "corrected_columns": row.get("corrected_columns", []), "default_set": [ col_name for col_name in row.get("corrected_columns", []) if row.get("default_set", False) and col_name == "directory" ], # Specify which keys are explicitly `default_set` } for row in updated_raw_data if row.get("corrected") or row.get("default_set") # Only include rows with changes ] logger.debug(f"Constructed addinfo: {updated_addinfo}") # Clean updated raw data in place for row in updated_raw_data: # Remove unwanted metadata fields row.pop("corrected", None) row.pop("corrected_columns", None) row.pop("default_set", None) row.pop("defaulted_columns", None) row.pop("directory", None) # Sanitize nested data (e.g., replace directory tuples with strings) if "data" in row: for idx, value in enumerate(row["data"]): if isinstance(value, tuple): row["data"][idx] = value[0] # Extract the first item (string) # Confirm cleanup worked for row in updated_raw_data: unexpected_keys = [ k for k in row.keys() if k in [ "corrected", "corrected_columns", "default_set", "defaulted_columns", "directory", ] ] if unexpected_keys: logger.error(f"Unexpected keys persist: {unexpected_keys}") # Construct stripped_raw_data from the cleaned updated_raw_data stripped_raw_data = [ { k: v for k, v in row.items() if k not in [ "corrected", "corrected_columns", "default_set", "defaulted_columns", "directory", ] } for row in updated_raw_data ] # Verify the final stripped raw data before returning # logger.debug(f"Sanitized raw_data for response: {stripped_raw_data}") response_data = SpreadsheetResponse( data=validated_model, errors=errors, raw_data=stripped_raw_data, # Final submission data addinfo=updated_addinfo, # Metadata for frontend display dewars_count=len(dewars), dewars=list(dewars), pucks_count=len(pucks), pucks=list(pucks), samples_count=len(samples), samples=list(samples), headers=headers, ) logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}") # Store row data for future use for idx, row in enumerate(validated_model): row_num = idx + 4 # Adjust row numbering if necessary row_storage.set_row(row_num, row.dict()) logger.info( f"Returning response with {len(validated_model)}" f"records and {len(errors)} errors." ) return response_data except SpreadsheetImportError as e: logger.error(f"Spreadsheet import error: {str(e)}") raise HTTPException( status_code=400, detail=f"Error processing spreadsheet: {str(e)}" ) except Exception as e: logger.error(f"Unexpected error occurred: {str(e)}") raise HTTPException( status_code=500, detail=f"Failed to upload file. Please try again. Error: {str(e)}", ) @router.post("/validate-cell") async def validate_cell(data: dict): row_num = data.get("row") col_name = data.get("column") value = data.get("value") logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}") # Retrieve the full data for the row current_row_data = row_storage.get_row(row_num) if not current_row_data: logger.error(f"No data found for row {row_num}") # Explicitly return a 404 error if the row is missing raise HTTPException(status_code=404, detail=f"No data found for row {row_num}") try: # Determine the expected type for the column expected_type = importer.get_expected_type(col_name) # Clean and validate the specific field cleaned_value = importer._clean_value(value, expected_type) current_row_data[col_name] = cleaned_value # Update raw data # Nested parameter handling for `DataCollectionParameters` if col_name in DataCollectionParameters.model_fields: nested_data = current_row_data.get("data_collection_parameters") if isinstance(nested_data, dict): # Convert dict to Pydantic model current_nested = DataCollectionParameters(**nested_data) elif isinstance(nested_data, DataCollectionParameters): # Already a valid model current_nested = nested_data else: current_nested = DataCollectionParameters() # Update the nested model's field and reapply validation nested_params = current_nested.model_dump() nested_params[col_name] = cleaned_value current_row_data["data_collection_parameters"] = DataCollectionParameters( **nested_params ) return {"is_valid": True, "message": "", "corrected_value": cleaned_value} except ValidationError as e: # Handle validation errors logger.error(f"Validation error details: {e.errors()}") column_error = next( (err for err in e.errors() if err.get("loc")[0] == col_name), None ) message = column_error["msg"] if column_error else "Validation failed." logger.error( f"Validation failed for row {row_num}, column {col_name}. Error: {message}" ) return {"is_valid": False, "message": message} except ValueError as e: # Handle expected typecasting or value errors specifically error_message = str(e) logger.warning( f"Failed to validate value '{value}' for row " f"{row_num}, column {col_name}: {error_message}" ) raise HTTPException( status_code=400, detail=f"Validation failed for row " f"{row_num}, column {col_name}: {error_message}", ) except Exception as e: # Log unexpected issues and re-raise HTTP 500 logger.error(f"Unexpected error during validation: {str(e)}") raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")