aaredb/backend/app/services/spreadsheet_service.py
2024-11-05 14:08:34 +01:00

107 lines
3.5 KiB
Python

# app/services/spreadsheet_service.py
import logging
import openpyxl
from pydantic import ValidationError, parse_obj_as
from typing import List
from app.sample_models import SpreadsheetModel, TELLModel
from io import BytesIO
UNASSIGNED_PUCKADDRESS = "---"
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SpreadsheetImportError(Exception):
pass
class SampleSpreadsheetImporter:
def __init__(self):
self.filename = None
self.model = None
self.available_puck_positions = []
def import_spreadsheet(self, file):
self.available_puck_positions = [
f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6)
]
self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS)
self.filename = file.filename
try:
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
contents = file.file.read() # Read the file contents into memory
file.file.seek(0) # Reset file pointer to the beginning
workbook = openpyxl.load_workbook(BytesIO(contents))
sheet = workbook["Samples"]
except KeyError:
raise SpreadsheetImportError("The file is missing 'Samples' worksheet.")
except Exception as e:
raise SpreadsheetImportError(f"Failed to read the file: {str(e)}")
return self.process_spreadsheet(sheet)
def process_spreadsheet(self, sheet):
model = []
# Skip the first 3 rows
rows = list(sheet.iter_rows(min_row=4, values_only=True))
for row in rows:
sample = {
'dewarname': self._clean_value(row[0]),
'puckname': self._clean_value(row[1]),
'crystalname': self._clean_value(row[4])
}
if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']:
# Skip rows with missing required fields
continue
model.append(sample)
logger.info(f"Sample processed: {sample}") # Adding log for each processed sample
logger.info(f"...finished import, got {len(model)} samples")
self.model = model
try:
validated_model = self.validate()
except SpreadsheetImportError as e:
logger.error(f"Failed to validate spreadsheet: {str(e)}")
raise
return validated_model
def validate(self):
model = self.model
logger.info(f"...validating {len(model)} samples")
# Log the model before validation
for sample in model:
logger.info(f"Validating sample: {sample}")
validated_model = self.data_model_validation(SpreadsheetModel, model)
# Log the validated model after validation
for sample in validated_model:
logger.info(f"Validated sample: {sample}")
return validated_model
@staticmethod
def data_model_validation(data_model, model):
try:
validated = parse_obj_as(List[data_model], model)
except ValidationError as e:
raise SpreadsheetImportError(f"{e.errors()[0]['loc'][2]} => {e.errors()[0]['msg']}")
validated_model = [dict(value) for value in validated]
return validated_model
@staticmethod
def _clean_value(value):
if isinstance(value, str):
return value.strip()
return value # For other types (int, float, None, etc.), return value as is or handle accordingly