added error recognition in spreadsheet

This commit is contained in:
GotthardG
2024-11-07 10:10:53 +01:00
parent eed50aa942
commit 8f82a3b7fe
5 changed files with 165 additions and 59 deletions

View File

@ -1,3 +1,4 @@
from app.sample_models import SpreadsheetModel, SpreadsheetResponse
from fastapi import APIRouter, UploadFile, File, HTTPException
import logging
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
@ -7,56 +8,50 @@ import os
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/download-template", response_class=FileResponse)
async def download_template():
# Constructing the absolute path
# No changes here; just serves a static file
current_dir = os.path.dirname(__file__)
template_path = os.path.join(current_dir, "../../downloads/V7_TELLSamplesSpreadsheetTemplate.xlsx")
if not os.path.exists(template_path):
raise HTTPException(status_code=404, detail="Template file not found.")
return FileResponse(template_path, filename="V7_TELLSamplesSpreadsheetTemplate.xlsx",
return FileResponse(template_path, filename="template.xlsx",
media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
@router.post("/upload")
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
try:
logger.info(f"Received file: {file.filename}")
# File type check
# Validate file type
if not file.filename.endswith('.xlsx'):
logger.error("Invalid file format")
raise HTTPException(status_code=400, detail="Invalid file format. Please upload an .xlsx file.")
# Reading file
# Process spreadsheet
importer = SampleSpreadsheetImporter()
validated_model = importer.import_spreadsheet(file)
logger.info(f"Validated model: {validated_model}")
validated_model, errors, raw_data = importer.import_spreadsheet_with_errors(file)
# Collect dewar, puck, and sample names
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
pucks = {sample.puckname for sample in validated_model if sample.puckname}
samples = {sample.crystalname for sample in validated_model if sample.crystalname}
# Logging the sets of names
logger.info(f"Dewar Names: {dewars}")
logger.info(f"Puck Names: {pucks}")
logger.info(f"Sample Names: {samples}")
# Construct response data
response_data = SpreadsheetResponse(
data=validated_model,
errors=errors,
raw_data=raw_data,
dewars_count=len(dewars),
dewars=list(dewars),
pucks_count=len(pucks),
pucks=list(pucks),
samples_count=len(samples),
samples=list(samples)
)
# Forming structured response
response = {
"dewars_count": len(dewars),
"dewars": list(dewars),
"pucks_count": len(pucks),
"pucks": list(pucks),
"samples_count": len(samples),
"samples": list(samples)
}
logger.info(f"Returning response: {response}")
return response
logger.info(f"Returning response: {response_data.dict()}")
return response_data
except SpreadsheetImportError as e:
logger.error(f"Spreadsheet import error: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))

View File

@ -1,5 +1,6 @@
import re
from typing import Any, Optional
from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated
@ -267,4 +268,17 @@ class SpreadsheetModel(BaseModel):
username: str
puck_number: int
prefix: Optional[str]
folder: Optional[str]
folder: Optional[str]
class SpreadsheetResponse(BaseModel):
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
errors: List[Dict[str, Any]] # Errors encountered during validation
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
dewars_count: int
dewars: List[str]
pucks_count: int
pucks: List[str]
samples_count: int
samples: List[str]
__all__ = ['SpreadsheetModel', 'SpreadsheetResponse']

View File

@ -1,7 +1,9 @@
# sample_spreadsheet_importer.py
import logging
import openpyxl
from pydantic import ValidationError
from typing import Union
from typing import Union, List, Tuple
from io import BytesIO
from app.sample_models import SpreadsheetModel
@ -40,6 +42,9 @@ class SampleSpreadsheetImporter:
return value
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
self.model = []
self.filename = file.filename
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
@ -64,8 +69,10 @@ class SampleSpreadsheetImporter:
return self.process_spreadsheet(sheet)
def process_spreadsheet(self, sheet):
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
model = []
errors = []
raw_data = []
# Skip the first 3 rows
rows = list(sheet.iter_rows(min_row=4, values_only=True))
@ -82,6 +89,9 @@ class SampleSpreadsheetImporter:
logger.debug(f"Skipping empty row at index {index}")
continue
# Record raw data for later use
raw_data.append({"row_num": index + 4, "data": row})
# Pad the row to ensure it has the expected number of columns
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
@ -126,10 +136,54 @@ class SampleSpreadsheetImporter:
model.append(validated_record)
logger.debug(f"Row {index + 4} processed and validated successfully")
except ValidationError as e:
error_message = f"Validation error in row {index + 4}: {e}"
logger.error(error_message)
raise SpreadsheetImportError(error_message)
logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors():
field = error['loc'][0]
msg = error['msg']
# Map field name (which is the key in `record`) to its index in the row
field_to_col = {
'dewarname': 0,
'puckname': 1,
'pucktype': 2,
'crystalname': 3,
'positioninpuck': 4,
'priority': 5,
'comments': 6,
'directory': 7,
'proteinname': 8,
'oscillation': 9,
'aperture': 10,
'exposure': 11,
'totalrange': 12,
'transmission': 13,
'dose': 14,
'targetresolution': 15,
'datacollectiontype': 16,
'processingpipeline': 17,
'spacegroupnumber': 18,
'cellparameters': 19,
'rescutkey': 20,
'rescutvalue': 21,
'userresolution': 22,
'pdbid': 23,
'autoprocfull': 24,
'procfull': 25,
'adpenabled': 26,
'noano': 27,
'ffcscampaign': 28,
'trustedhigh': 29,
'autoprocextraparams': 30,
'chiphiangles': 31
}
column_index = field_to_col[field]
error_info = {
'row': index + 4,
'cell': column_index,
'value': row[column_index], # Value that caused the error
'message': msg
}
errors.append(error_info)
self.model = model
logger.info(f"Finished processing {len(model)} records")
return self.model
logger.info(f"Finished processing {len(model)} records with {len(errors)} errors")
return self.model, errors, raw_data