added error recognition in spreadsheet

This commit is contained in:
GotthardG
2024-11-07 10:10:53 +01:00
parent eed50aa942
commit 8f82a3b7fe
5 changed files with 165 additions and 59 deletions

View File

@ -1,7 +1,9 @@
# sample_spreadsheet_importer.py
import logging
import openpyxl
from pydantic import ValidationError
from typing import Union
from typing import Union, List, Tuple
from io import BytesIO
from app.sample_models import SpreadsheetModel
@ -40,6 +42,9 @@ class SampleSpreadsheetImporter:
return value
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
self.model = []
self.filename = file.filename
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
@ -64,8 +69,10 @@ class SampleSpreadsheetImporter:
return self.process_spreadsheet(sheet)
def process_spreadsheet(self, sheet):
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
model = []
errors = []
raw_data = []
# Skip the first 3 rows
rows = list(sheet.iter_rows(min_row=4, values_only=True))
@ -82,6 +89,9 @@ class SampleSpreadsheetImporter:
logger.debug(f"Skipping empty row at index {index}")
continue
# Record raw data for later use
raw_data.append({"row_num": index + 4, "data": row})
# Pad the row to ensure it has the expected number of columns
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
@ -126,10 +136,54 @@ class SampleSpreadsheetImporter:
model.append(validated_record)
logger.debug(f"Row {index + 4} processed and validated successfully")
except ValidationError as e:
error_message = f"Validation error in row {index + 4}: {e}"
logger.error(error_message)
raise SpreadsheetImportError(error_message)
logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors():
field = error['loc'][0]
msg = error['msg']
# Map field name (which is the key in `record`) to its index in the row
field_to_col = {
'dewarname': 0,
'puckname': 1,
'pucktype': 2,
'crystalname': 3,
'positioninpuck': 4,
'priority': 5,
'comments': 6,
'directory': 7,
'proteinname': 8,
'oscillation': 9,
'aperture': 10,
'exposure': 11,
'totalrange': 12,
'transmission': 13,
'dose': 14,
'targetresolution': 15,
'datacollectiontype': 16,
'processingpipeline': 17,
'spacegroupnumber': 18,
'cellparameters': 19,
'rescutkey': 20,
'rescutvalue': 21,
'userresolution': 22,
'pdbid': 23,
'autoprocfull': 24,
'procfull': 25,
'adpenabled': 26,
'noano': 27,
'ffcscampaign': 28,
'trustedhigh': 29,
'autoprocextraparams': 30,
'chiphiangles': 31
}
column_index = field_to_col[field]
error_info = {
'row': index + 4,
'cell': column_index,
'value': row[column_index], # Value that caused the error
'message': msg
}
errors.append(error_info)
self.model = model
logger.info(f"Finished processing {len(model)} records")
return self.model
logger.info(f"Finished processing {len(model)} records with {len(errors)} errors")
return self.model, errors, raw_data