added error recognition in spreadsheet
This commit is contained in:
@@ -4,40 +4,47 @@ import logging
|
||||
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
|
||||
from fastapi.responses import FileResponse
|
||||
import os
|
||||
from pydantic import ValidationError # Import ValidationError here
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.get("/download-template", response_class=FileResponse)
|
||||
async def download_template():
|
||||
# No changes here; just serves a static file
|
||||
"""Serve a template file for spreadsheet upload."""
|
||||
current_dir = os.path.dirname(__file__)
|
||||
template_path = os.path.join(current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx")
|
||||
|
||||
if not os.path.exists(template_path):
|
||||
raise HTTPException(status_code=404, detail="Template file not found.")
|
||||
|
||||
return FileResponse(template_path, filename="template.xlsx",
|
||||
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
|
||||
|
||||
@router.post("/upload", response_model=SpreadsheetResponse)
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
"""Process the uploaded spreadsheet and return validation results."""
|
||||
try:
|
||||
logger.info(f"Received file: {file.filename}")
|
||||
|
||||
# Validate file type
|
||||
# Validate file format
|
||||
if not file.filename.endswith('.xlsx'):
|
||||
logger.error("Invalid file format")
|
||||
raise HTTPException(status_code=400, detail="Invalid file format. Please upload an .xlsx file.")
|
||||
|
||||
# Process spreadsheet
|
||||
# Initialize the importer and process the spreadsheet
|
||||
importer = SampleSpreadsheetImporter()
|
||||
validated_model, errors, raw_data = importer.import_spreadsheet_with_errors(file)
|
||||
validated_model, errors, raw_data, headers = importer.import_spreadsheet_with_errors(file)
|
||||
|
||||
# Collect dewar, puck, and sample names
|
||||
# Extract unique values for dewars, pucks, and samples
|
||||
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
|
||||
pucks = {sample.puckname for sample in validated_model if sample.puckname}
|
||||
samples = {sample.crystalname for sample in validated_model if sample.crystalname}
|
||||
|
||||
# Construct response data
|
||||
# Construct the response model with the processed data
|
||||
response_data = SpreadsheetResponse(
|
||||
data=validated_model,
|
||||
errors=errors,
|
||||
@@ -47,14 +54,42 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
pucks_count=len(pucks),
|
||||
pucks=list(pucks),
|
||||
samples_count=len(samples),
|
||||
samples=list(samples)
|
||||
samples=list(samples),
|
||||
headers=headers # Include headers in the response
|
||||
)
|
||||
|
||||
logger.info(f"Returning response: {response_data.dict()}")
|
||||
logger.info(f"Returning response with {len(validated_model)} records and {len(errors)} errors.")
|
||||
return response_data
|
||||
|
||||
except SpreadsheetImportError as e:
|
||||
logger.error(f"Spreadsheet import error: {str(e)}")
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
raise HTTPException(status_code=400, detail=f"Error processing spreadsheet: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process file: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to upload file. Please try again. {str(e)}")
|
||||
logger.error(f"Unexpected error occurred: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to upload file. Please try again. Error: {str(e)}")
|
||||
|
||||
|
||||
@router.post("/validate-cell")
|
||||
async def validate_cell(data: dict):
|
||||
"""Validate a single cell value based on expected column type."""
|
||||
row_num = data.get("row")
|
||||
col_name = data.get("column")
|
||||
value = data.get("value")
|
||||
|
||||
importer = SampleSpreadsheetImporter()
|
||||
|
||||
# Determine the expected type based on column name
|
||||
expected_type = importer.get_expected_type(col_name)
|
||||
|
||||
# Clean and validate the cell value
|
||||
cleaned_value = importer._clean_value(value, expected_type)
|
||||
|
||||
try:
|
||||
# Validate the cleaned value using the SpreadsheetModel (Pydantic validation)
|
||||
SpreadsheetModel(**{col_name: cleaned_value})
|
||||
return {"is_valid": True, "message": ""}
|
||||
except ValidationError as e:
|
||||
# If validation fails, return the first error message
|
||||
message = e.errors()[0]['msg']
|
||||
return {"is_valid": False, "message": message}
|
||||
@@ -280,5 +280,7 @@ class SpreadsheetResponse(BaseModel):
|
||||
pucks: List[str]
|
||||
samples_count: int
|
||||
samples: List[str]
|
||||
headers: Optional[List[str]] = None # Add headers if needed
|
||||
|
||||
|
||||
__all__ = ['SpreadsheetModel', 'SpreadsheetResponse']
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
# sample_spreadsheet_importer.py
|
||||
|
||||
import logging
|
||||
import openpyxl
|
||||
from pydantic import ValidationError
|
||||
@@ -10,11 +8,9 @@ from app.sample_models import SpreadsheetModel
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpreadsheetImportError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SampleSpreadsheetImporter:
|
||||
def __init__(self):
|
||||
self.filename = None
|
||||
@@ -44,7 +40,18 @@ class SampleSpreadsheetImporter:
|
||||
def import_spreadsheet(self, file):
|
||||
return self.import_spreadsheet_with_errors(file)
|
||||
|
||||
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
|
||||
def get_expected_type(self, col_name):
|
||||
type_mapping = {
|
||||
'dewarname': str,
|
||||
'puckname': str,
|
||||
'positioninpuck': int,
|
||||
'priority': int,
|
||||
'oscillation': float,
|
||||
# Add all other mappings based on model requirements
|
||||
}
|
||||
return type_mapping.get(col_name, str) # Default to `str`
|
||||
|
||||
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict], List[str]]:
|
||||
self.model = []
|
||||
self.filename = file.filename
|
||||
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
|
||||
@@ -67,12 +74,17 @@ class SampleSpreadsheetImporter:
|
||||
logger.error(f"Failed to read the file: {str(e)}")
|
||||
raise SpreadsheetImportError(f"Failed to read the file: {str(e)}")
|
||||
|
||||
return self.process_spreadsheet(sheet)
|
||||
# Unpack four values from the process_spreadsheet method
|
||||
model, errors, raw_data, headers = self.process_spreadsheet(sheet)
|
||||
|
||||
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
|
||||
# Now, return the values correctly
|
||||
return model, errors, raw_data, headers
|
||||
|
||||
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict], List[str]]:
|
||||
model = []
|
||||
errors = []
|
||||
raw_data = []
|
||||
headers = []
|
||||
|
||||
# Skip the first 3 rows
|
||||
rows = list(sheet.iter_rows(min_row=4, values_only=True))
|
||||
@@ -84,6 +96,16 @@ class SampleSpreadsheetImporter:
|
||||
|
||||
expected_columns = 32 # Number of columns expected based on the model
|
||||
|
||||
# Add the headers (the first row in the spreadsheet or map them explicitly)
|
||||
headers = [
|
||||
'dewarname', 'puckname', 'pucktype', 'crystalname', 'positioninpuck', 'priority',
|
||||
'comments', 'directory', 'proteinname', 'oscillation', 'aperture', 'exposure',
|
||||
'totalrange', 'transmission', 'dose', 'targetresolution', 'datacollectiontype',
|
||||
'processingpipeline', 'spacegroupnumber', 'cellparameters', 'rescutkey', 'rescutvalue',
|
||||
'userresolution', 'pdbid', 'autoprocfull', 'procfull', 'adpenabled', 'noano',
|
||||
'ffcscampaign', 'trustedhigh', 'autoprocextraparams', 'chiphiangles'
|
||||
]
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
if not any(row):
|
||||
logger.debug(f"Skipping empty row at index {index}")
|
||||
@@ -96,6 +118,7 @@ class SampleSpreadsheetImporter:
|
||||
if len(row) < expected_columns:
|
||||
row = list(row) + [None] * (expected_columns - len(row))
|
||||
|
||||
# Prepare the record with the cleaned values
|
||||
record = {
|
||||
'dewarname': self._clean_value(row[0], str),
|
||||
'puckname': self._clean_value(row[1], str),
|
||||
@@ -186,4 +209,4 @@ class SampleSpreadsheetImporter:
|
||||
|
||||
self.model = model
|
||||
logger.info(f"Finished processing {len(model)} records with {len(errors)} errors")
|
||||
return self.model, errors, raw_data
|
||||
return self.model, errors, raw_data, headers # Include headers in the response
|
||||
|
||||
Reference in New Issue
Block a user