added error recognition in spreadsheet

This commit is contained in:
GotthardG
2024-11-07 14:27:49 +01:00
parent 8f82a3b7fe
commit 501d09e6aa
5 changed files with 274 additions and 102 deletions

View File

@ -1,5 +1,3 @@
# sample_spreadsheet_importer.py
import logging
import openpyxl
from pydantic import ValidationError
@ -10,11 +8,9 @@ from app.sample_models import SpreadsheetModel
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class SpreadsheetImportError(Exception):
pass
class SampleSpreadsheetImporter:
def __init__(self):
self.filename = None
@ -44,7 +40,18 @@ class SampleSpreadsheetImporter:
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
def get_expected_type(self, col_name):
type_mapping = {
'dewarname': str,
'puckname': str,
'positioninpuck': int,
'priority': int,
'oscillation': float,
# Add all other mappings based on model requirements
}
return type_mapping.get(col_name, str) # Default to `str`
def import_spreadsheet_with_errors(self, file) -> Tuple[List[SpreadsheetModel], List[dict], List[dict], List[str]]:
self.model = []
self.filename = file.filename
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
@ -67,12 +74,17 @@ class SampleSpreadsheetImporter:
logger.error(f"Failed to read the file: {str(e)}")
raise SpreadsheetImportError(f"Failed to read the file: {str(e)}")
return self.process_spreadsheet(sheet)
# Unpack four values from the process_spreadsheet method
model, errors, raw_data, headers = self.process_spreadsheet(sheet)
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict]]:
# Now, return the values correctly
return model, errors, raw_data, headers
def process_spreadsheet(self, sheet) -> Tuple[List[SpreadsheetModel], List[dict], List[dict], List[str]]:
model = []
errors = []
raw_data = []
headers = []
# Skip the first 3 rows
rows = list(sheet.iter_rows(min_row=4, values_only=True))
@ -84,6 +96,16 @@ class SampleSpreadsheetImporter:
expected_columns = 32 # Number of columns expected based on the model
# Add the headers (the first row in the spreadsheet or map them explicitly)
headers = [
'dewarname', 'puckname', 'pucktype', 'crystalname', 'positioninpuck', 'priority',
'comments', 'directory', 'proteinname', 'oscillation', 'aperture', 'exposure',
'totalrange', 'transmission', 'dose', 'targetresolution', 'datacollectiontype',
'processingpipeline', 'spacegroupnumber', 'cellparameters', 'rescutkey', 'rescutvalue',
'userresolution', 'pdbid', 'autoprocfull', 'procfull', 'adpenabled', 'noano',
'ffcscampaign', 'trustedhigh', 'autoprocextraparams', 'chiphiangles'
]
for index, row in enumerate(rows):
if not any(row):
logger.debug(f"Skipping empty row at index {index}")
@ -96,6 +118,7 @@ class SampleSpreadsheetImporter:
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
# Prepare the record with the cleaned values
record = {
'dewarname': self._clean_value(row[0], str),
'puckname': self._clean_value(row[1], str),
@ -186,4 +209,4 @@ class SampleSpreadsheetImporter:
self.model = model
logger.info(f"Finished processing {len(model)} records with {len(errors)} errors")
return self.model, errors, raw_data
return self.model, errors, raw_data, headers # Include headers in the response