diff --git a/backend/app/routers/spreadsheet.py b/backend/app/routers/spreadsheet.py index b094e4b..65db3d3 100644 --- a/backend/app/routers/spreadsheet.py +++ b/backend/app/routers/spreadsheet.py @@ -1,5 +1,3 @@ -# app/routes/spreadsheet.py - from fastapi import APIRouter, UploadFile, File, HTTPException import logging from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError @@ -7,6 +5,7 @@ from app.services.spreadsheet_service import SampleSpreadsheetImporter, Spreadsh router = APIRouter() logger = logging.getLogger(__name__) + @router.post("/upload") async def upload_file(file: UploadFile = File(...)): try: @@ -22,9 +21,9 @@ async def upload_file(file: UploadFile = File(...)): validated_model = importer.import_spreadsheet(file) logger.info(f"Validated model: {validated_model}") - dewars = {sample['dewarname'] for sample in validated_model if 'dewarname' in sample} - pucks = {sample['puckname'] for sample in validated_model if 'puckname' in sample} - samples = {sample['crystalname'] for sample in validated_model if 'crystalname' in sample} + dewars = {sample.dewarname for sample in validated_model if sample.dewarname} + pucks = {sample.puckname for sample in validated_model if sample.puckname} + samples = {sample.crystalname for sample in validated_model if sample.crystalname} # Logging the sets of names logger.info(f"Dewar Names: {dewars}") @@ -48,4 +47,4 @@ async def upload_file(file: UploadFile = File(...)): raise HTTPException(status_code=400, detail=str(e)) except Exception as e: logger.error(f"Failed to process file: {str(e)}") - raise HTTPException(status_code=500, detail=f"Failed to upload file. Please try again. {str(e)}") \ No newline at end of file + raise HTTPException(status_code=500, detail=f"Failed to upload file. Please try again. {str(e)}") diff --git a/backend/app/sample_models.py b/backend/app/sample_models.py index a8d2c67..32aa9e7 100644 --- a/backend/app/sample_models.py +++ b/backend/app/sample_models.py @@ -1,65 +1,70 @@ import re -from typing import Any, Optional, Union -from pydantic import BaseModel, Field, field_validator, AliasChoices +from typing import Any, Optional +from pydantic import BaseModel, Field, field_validator from typing_extensions import Annotated class SpreadsheetModel(BaseModel): dewarname: str = Field(..., alias='dewarname') puckname: str = Field(..., alias='puckname') - pucktype: Optional[str] = "unipuck" - pucklocationindewar: Optional[Union[int, str]] + pucktype: Optional[str] = Field(None, alias="pucktype") crystalname: Annotated[ str, Field(..., max_length=64, title="Crystal Name", - description="""max_length imposed by MTZ file header format - https://www.ccp4.ac.uk/html/mtzformat.html""", + description="max_length imposed by MTZ file header format https://www.ccp4.ac.uk/html/mtzformat.html", alias='crystalname' - ), + ), ] - positioninpuck: int + positioninpuck: int # Only accept positive integers between 1 and 16 priority: Optional[int] comments: Optional[str] - pinbarcode: Optional[str] directory: Optional[str] - proteinname: Any = "" - oscillation: Any = "" - exposure: Any = "" - totalrange: Any = "" - transmission: Any = "" - targetresolution: Any = "" - aperture: Any = "" - datacollectiontype: Any = "" - processingpipeline: Any = "" - spacegroupnumber: Any = "" - cellparameters: Any = "" - rescutkey: Any = "" - rescutvalue: Any = "" - userresolution: Any = "" - pdbmodel: Any = "" - autoprocfull: Any = "" - procfull: Any = "" - adpenabled: Any = "" - noano: Any = "" - trustedhigh: Any = "" - ffcscampaign: Any = "" - autoprocextraparams: Any = "" - chiphiangles: Any = "" + proteinname: Optional[str] = "" # Alphanumeric validation + oscillation: Optional[float] = None # Only accept positive float + exposure: Optional[float] = None # Only accept positive floats between 0 and 1 + totalrange: Optional[int] = None # Only accept positive integers between 0 and 360 + transmission: Optional[int] = None # Only accept positive integers between 0 and 100 + targetresolution: Optional[float] = None # Only accept positive float + aperture: Optional[str] = None # Optional string field + datacollectiontype: Optional[str] = None # Only accept "standard", other types might be added later + processingpipeline: Optional[str] = "" # Only accept "gopy", "autoproc", "xia2dials" + spacegroupnumber: Optional[int] = None # Only accept positive integers between 1 and 230 + cellparameters: Optional[str] = None # Must be a set of six positive floats or integers + rescutkey: Optional[str] = None # Only accept "is" or "cchalf" + rescutvalue: Optional[float] = None # Must be a positive float if rescutkey is provided + userresolution: Optional[float] = None + pdbid: Optional[str] = "" # Accepts either the format of the protein data bank code or {provided} + autoprocfull: Optional[bool] = None + procfull: Optional[bool] = None + adpenabled: Optional[bool] = None + noano: Optional[bool] = None + ffcscampaign: Optional[bool] = None + trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0 + autoprocextraparams: Optional[str] = None # Optional string field + chiphiangles: Optional[float] = None # Optional float field between 0 and 30 + dose: Optional[float] = None # Optional float field + # Add pucktype validation + @field_validator('pucktype', mode="before") + @classmethod + def validate_pucktype(cls, v): + if v != "unipuck": + raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.") + return v + + # Validators @field_validator('dewarname', 'puckname', mode="before") @classmethod def dewarname_puckname_characters(cls, v): if v: - assert ( - len(str(v)) > 0 - ), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet.""" - v = str(v).replace(" ", "_") + v = str(v).strip().replace(" ", "_").upper() if re.search("\n", v): assert v.isalnum(), "is not valid. newline character detected." v = re.sub(r"\.0$", "", v) - return v.upper() + return v + raise ValueError("Value must be provided for dewarname and puckname.") @field_validator('crystalname', mode="before") @classmethod @@ -68,9 +73,7 @@ class SpreadsheetModel(BaseModel): if re.search("\n", v): assert v.isalnum(), "is not valid. newline character detected." characters = re.sub("[._+-]", "", v) - assert characters.isalnum(), f"""" {v} " is not valid. - must contain only alphanumeric and . _ + - characters""" - v = re.sub(r"\.0$", "", v) + assert characters.isalnum(), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed." return v @field_validator('directory', mode="before") @@ -79,343 +82,182 @@ class SpreadsheetModel(BaseModel): if v: v = str(v).strip("/").replace(" ", "_") if re.search("\n", v): - raise ValueError( - f"""" {v} " is not valid. - newline character detected.""" - ) - ok = "[a-z0-9_.+-]" - directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE) - if not directory_re.match(v): - raise ValueError( - f"' {v} ' is not valid. value must be a path or macro." - ) + raise ValueError(f" '{v}' is not valid. newline character detected.") - these_macros = re.findall(r"(\{[^}]+\})", v) - valid_macros = [ - "{date}", - "{prefix}", - "{sgpuck}", - "{puck}", - "{beamline}", - "{sgprefix}", - "{sgpriority}", - "{sgposition}", - "{protein}", - "{method}", - ] - for m in these_macros: - if m.lower() not in valid_macros: - raise ValueError( - f"""" {m} " is not a valid macro, please re-check documentation; - allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix, - sgpriority, sgposition, protein, method""" - ) + valid_macros = ["{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}", + "{sgpriority}", "{sgposition}", "{protein}", "{method}"] + pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros)) + v = pattern.sub('macro', v) + + allowed_chars = "[a-z0-9_.+-]" + directory_re = re.compile(f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE) + if not directory_re.match(v): + raise ValueError(f" '{v}' is not valid. Value must be a valid path or macro.") return v @field_validator('positioninpuck', mode="before") @classmethod def positioninpuck_possible(cls, v): - if v: - try: - v = int(float(v)) - if v < 1 or v > 16: - raise ValueError( - f"""" {v} " is not valid. value must be from 1 to 16.""" - ) - except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - Value must be a numeric type and from 1 to 16.""" - ) from e - else: - raise ValueError("Value must be provided. Value must be from 1 to 16.") + if not isinstance(v, int) or v < 1 or v > 16: + raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 16.") return v - @field_validator('pucklocationindewar', mode="before") - @classmethod - def pucklocationindewar_convert_to_str(cls, v): - if v == "Unipuck": - return v - try: - return str(int(float(v))) - except ValueError: - raise ValueError(f"Value error, could not convert string to float: '{v}'") - @field_validator('priority', mode="before") @classmethod def priority_positive(cls, v): if v is not None: - v = str(v).strip() - v = re.sub(r"\.0$", "", v) try: - if int(v) <= 0: - raise ValueError( - f" '{v}' is not valid. Value must be a positive integer." - ) v = int(v) + if v <= 0: + raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be a positive integer." - ) from e + raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") from e return v @field_validator('aperture', mode="before") @classmethod def aperture_selection(cls, v): - if v: + if v is not None: try: v = int(float(v)) - if v not in [1, 2, 3]: - raise ValueError( - f"""" {v} " is not valid. - value must be integer 1, 2 or 3.""" - ) + if v not in {1, 2, 3}: + raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - value must be integer 1, 2 or 3.""" - ) from e + raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") from e return v - @field_validator( - "oscillation", - "exposure", - "totalrange", - "targetresolution", - "rescutvalue", - "userresolution", - mode="before" - ) + @field_validator('oscillation', 'targetresolution', mode="before") @classmethod - def parameter_positive_float(cls, v): - if v: + def positive_float_validator(cls, v): + if v is not None: try: v = float(v) - if not v > 0: - raise ValueError( - f"""" {v} " is not valid. - value must be a positive float.""" - ) + if v <= 0: + raise ValueError(f" '{v}' is not valid. Value must be a positive float.") except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - value must be a positive float.""" - ) from e + raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e + return v + + @field_validator('exposure', mode="before") + @classmethod + def exposure_in_range(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 1): + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") + except (ValueError, TypeError) as e: + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") from e + return v + + @field_validator('totalrange', mode="before") + @classmethod + def totalrange_in_range(cls, v): + if v is not None: + try: + v = int(v) + if not (0 <= v <= 360): + raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") + except (ValueError, TypeError) as e: + raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") from e return v @field_validator('transmission', mode="before") @classmethod - def tranmission_fraction(cls, v): - if v: + def transmission_fraction(cls, v): + if v is not None: try: - v = float(v) - if 100 >= v > 0: - v = v - else: - raise ValueError( - f"""" {v} " is not valid. - value must be a float between 0 and 100.""" - ) + v = int(v) + if not (0 <= v <= 100): + raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - value must be a float between 0 and 100.""" - ) from e + raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") from e return v @field_validator('datacollectiontype', mode="before") @classmethod def datacollectiontype_allowed(cls, v): - if v: - v = v.lower() - allowed = ["standard", "serial-xtal", "multi-orientation"] - if str(v) not in allowed: - raise ValueError( - f"""" {v} " is not valid. - value must be one of" {allowed} ".""" - ) + allowed = {"standard"} # Other types of data collection might be added later + if v and v.lower() not in allowed: + raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.") return v @field_validator('processingpipeline', mode="before") @classmethod def processingpipeline_allowed(cls, v): - if v: - v = v.lower() - allowed = ["gopy", "autoproc", "xia2dials"] - if str(v) not in allowed: - raise ValueError( - f"""" {v} " is not valid. - value must be one of " {allowed} ".""" - ) + allowed = {"gopy", "autoproc", "xia2dials"} + if v and v.lower() not in allowed: + raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.") return v - @field_validator('spacegroupnumber', mode="before") - @classmethod - def spacegroupnumber_integer(cls, v): - if v: - try: - v = int(float(v)) - if not v > 0 or not v < 231: - raise ValueError( - f"""" {v} " is not valid. - value must be a positive integer between 1 and 230.""" - ) - except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - value must be a positive integer between 1 and 230.""" - ) from e - return v - - @field_validator('cellparameters', mode="before") - @classmethod - def cellparameters_positive_floats(cls, v): - if v: - splitted = str(v).split(" ") - if len(splitted) != 6: - raise ValueError( - f"' {v} ' is not valid. value must be a set of six numbers." - ) - for el in splitted: + @field_validator('spacegroupnumber', mode="before") + @classmethod + def spacegroupnumber_allowed(cls, v): + if v is not None: try: - el = float(el) - if not el > 0: - raise ValueError( - f"' {el} ' is not valid. value must be a positive float." - ) + v = int(v) + if not (1 <= v <= 230): + raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") except (ValueError, TypeError) as e: - raise ValueError( - f"' {el} ' is not valid. value must be a positive float." - ) from e - return v + raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") from e + return v - @field_validator('rescutkey', mode="before") - @classmethod - def rescutkey_allowed(cls, v): - if v: - v = v.lower() - allowed = ["is", "cchalf"] - if str(v) not in allowed: - raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.") - return v + @field_validator('cellparameters', mode="before") + @classmethod + def cellparameters_format(cls, v): + if v: + values = [float(i) for i in v.split(",")] + if len(values) != 6 or any(val <= 0 for val in values): + raise ValueError(f" '{v}' is not valid. Value must be a set of six positive floats or integers.") + return v - @field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before") - @classmethod - def boolean_allowed(cls, v): - if v: - v = v.title() - allowed = ["False", "True"] - if str(v) not in allowed: - raise ValueError( - f"""" {v} " is not valid. - value must be ' {allowed} '.""" - ) - return v + @field_validator('rescutkey', 'rescutvalue', mode="before") + @classmethod + def rescutkey_value_pair(cls, values): + rescutkey = values.get('rescutkey') + rescutvalue = values.get('rescutvalue') + if rescutkey and rescutvalue: + if rescutkey not in {"is", "cchalf"}: + raise ValueError("Rescutkey must be either 'is' or 'cchalf'") + if not isinstance(rescutvalue, float) or rescutvalue <= 0: + raise ValueError("Rescutvalue must be a positive float if rescutkey is provided") + return values - @field_validator('trustedhigh', mode="before") - @classmethod - def trusted_float(cls, v): - if v: - try: - v = float(v) - if 2.0 >= v > 0: - v = v - else: - raise ValueError( - f"""" {v} " is not valid. - value must be a float between 0 and 2.0.""" - ) - except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. - value must be a float between 0 and 2.0.""" - ) from e - return v + @field_validator('trustedhigh', mode="before") + @classmethod + def trustedhigh_allowed(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 2.0): + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") + except (ValueError, TypeError) as e: + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") from e + return v - @field_validator('proteinname', mode="before") - @classmethod - def proteinname_characters(cls, v): - if v: - v = str(v).replace(" ", "_") - if re.search("\n", v): - assert v.isalnum(), "is not valid. newline character detected." - characters = re.sub("[._+-]", "", v) - assert characters.isalnum(), f"""" {v} " is not valid. - must contain only alphanumeric and . _ + - characters""" - v = re.sub(r"\.0$", "", v) - return v + @field_validator('chiphiangles', mode="before") + @classmethod + def chiphiangles_allowed(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 30): + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") + except (ValueError, TypeError) as e: + raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") from e + return v - @field_validator('chiphiangles', mode="before") - @classmethod - def chiphiangles_value(cls, v): - if v: - try: - v = str(v) - v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip()) - list_of_strings = re.findall(r"\(.*?\)", v) - list_of_tuples = [] - for el in list_of_strings: - first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1] - second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1] - my_tuple = (float(first), float(second)) - list_of_tuples.append(my_tuple) - v = list_of_tuples - except (ValueError, TypeError) as e: - raise ValueError( - f"""" {v} " is not valid. Example format is - (0.0, 0.0), (20.0, 0.0), (30, 0.0)""" - ) from e - return v + @field_validator('dose', mode="before") + @classmethod + def dose_positive(cls, v): + if v is not None: + try: + v = float(v) + if v <= 0: + raise ValueError(f" '{v}' is not valid. Value must be a positive float.") + except (ValueError, TypeError) as e: + raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e + return v - @field_validator( - "priority", - "comments", - "pinbarcode", - "directory", - "proteinname", - "oscillation", - "exposure", - "totalrange", - "transmission", - "targetresolution", - "aperture", - "datacollectiontype", - "processingpipeline", - "spacegroupnumber", - "cellparameters", - "rescutkey", - "rescutvalue", - "userresolution", - "pdbmodel", - "autoprocfull", - "procfull", - "adpenabled", - "noano", - "trustedhigh", - "ffcscampaign", - "autoprocextraparams", - "chiphiangles", - mode="before" - ) - @classmethod - def set_default_emptystring(cls, v): - return v or "" - - class Config: - str_strip_whitespace = True - aliases = { - 'dewarname': 'dewarname', - 'puckname': 'puckname', - 'crystalname': 'crystalname', - } - - -class TELLModel(SpreadsheetModel): - input_order: int - samplemountcount: int = 0 - samplestatus: str = "not present" - puckaddress: str = "---" - username: str - puck_number: int - prefix: Optional[str] - folder: Optional[str] + class TELLModel(SpreadsheetModel): + pass # Extend the SpreadsheetModel with TELL-specific fields if needed \ No newline at end of file diff --git a/backend/app/services/spreadsheet_service.py b/backend/app/services/spreadsheet_service.py index af4b3c7..0ef4562 100644 --- a/backend/app/services/spreadsheet_service.py +++ b/backend/app/services/spreadsheet_service.py @@ -1,39 +1,46 @@ import logging import openpyxl -from pydantic import ValidationError, parse_obj_as -from typing import List -from app.sample_models import SpreadsheetModel +from pydantic import ValidationError +from typing import Union from io import BytesIO +from app.sample_models import SpreadsheetModel -UNASSIGNED_PUCKADDRESS = "---" -logging.basicConfig(level=logging.DEBUG) # Change to DEBUG level to see more logs +logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) + class SpreadsheetImportError(Exception): pass + class SampleSpreadsheetImporter: def __init__(self): self.filename = None self.model = None - self.available_puck_positions = [] - def _clean_value(self, value): + def _clean_value(self, value, expected_type=None): """Clean value by converting it to the expected type and stripping whitespace for strings.""" + if value is None: + return None + if expected_type == str: + return str(value).strip() + if expected_type in [float, int]: + try: + return expected_type(value) + except ValueError: + return None if isinstance(value, str): - return value.strip() - elif isinstance(value, (float, int)): - return str(value) # Always return strings for priority field validation + try: + if '.' in value: + return float(value) + else: + return int(value) + except ValueError: + return value.strip() return value def import_spreadsheet(self, file): - # Reinitialize state - self.available_puck_positions = [ - f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6) - ] - self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS) self.model = [] - self.filename = file.filename logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}") @@ -68,73 +75,61 @@ class SampleSpreadsheetImporter: logger.error("The 'Samples' worksheet is empty.") raise SpreadsheetImportError("The 'Samples' worksheet is empty.") + expected_columns = 32 # Number of columns expected based on the model + for index, row in enumerate(rows): - if not row or all(value is None for value in row): - logger.debug(f"Skipping empty row or row with all None values at index {index}.") + if not any(row): + logger.debug(f"Skipping empty row at index {index}") continue + # Pad the row to ensure it has the expected number of columns + if len(row) < expected_columns: + row = list(row) + [None] * (expected_columns - len(row)) + + record = { + 'dewarname': self._clean_value(row[0], str), + 'puckname': self._clean_value(row[1], str), + 'pucktype': self._clean_value(row[2], str), + 'crystalname': self._clean_value(row[3], str), + 'positioninpuck': self._clean_value(row[4], int), + 'priority': self._clean_value(row[5], int), + 'comments': self._clean_value(row[6], str), + 'directory': self._clean_value(row[7], str), + 'proteinname': self._clean_value(row[8], str), + 'oscillation': self._clean_value(row[9], float), + 'aperture': self._clean_value(row[10], str), + 'exposure': self._clean_value(row[11], float), + 'totalrange': self._clean_value(row[12], float), + 'transmission': self._clean_value(row[13], int), + 'dose': self._clean_value(row[14], float), + 'targetresolution': self._clean_value(row[15], float), + 'datacollectiontype': self._clean_value(row[16], str), + 'processingpipeline': self._clean_value(row[17], str), + 'spacegroupnumber': self._clean_value(row[18], int), + 'cellparameters': self._clean_value(row[19], str), + 'rescutkey': self._clean_value(row[20], str), + 'rescutvalue': self._clean_value(row[21], str), + 'userresolution': self._clean_value(row[22], str), + 'pdbid': self._clean_value(row[23], str), + 'autoprocfull': self._clean_value(row[24], str), + 'procfull': self._clean_value(row[25], str), + 'adpenabled': self._clean_value(row[26], str), + 'noano': self._clean_value(row[27], str), + 'ffcscampaign': self._clean_value(row[28], str), + 'trustedhigh': self._clean_value(row[29], str), + 'autoprocextraparams': self._clean_value(row[30], str), + 'chiphiangles': self._clean_value(row[31], str) + } + try: - sample = { - 'dewarname': self._clean_value(row[0]), - 'puckname': self._clean_value(row[1]), - 'pucklocationindewar': self._clean_value(row[2]) if len(row) > 2 else None, - 'positioninpuck': self._clean_value(row[3]) if len(row) > 3 else None, - 'crystalname': self._clean_value(row[4]), - 'priority': self._clean_value(row[5]) if len(row) > 5 else None, - 'comments': self._clean_value(row[6]) if len(row) > 6 else None, - 'pinbarcode': self._clean_value(row[7]) if len(row) > 7 else None, - 'directory': self._clean_value(row[8]) if len(row) > 8 else None, - } - except IndexError: - logger.error(f"Index error processing row at index {index}: Row has missing values.") - raise SpreadsheetImportError(f"Index error processing row at index {index}: Row has missing values.") + validated_record = SpreadsheetModel(**record) + model.append(validated_record) + logger.debug(f"Row {index + 4} processed and validated successfully") + except ValidationError as e: + error_message = f"Validation error in row {index + 4}: {e}" + logger.error(error_message) + raise SpreadsheetImportError(error_message) - # Skip rows missing essential fields - if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']: - logger.debug(f"Skipping row due to missing essential fields: {row}") - continue - - model.append(sample) - logger.info(f"Sample processed: {sample}") - - if not model: - logger.error("No valid samples found in the spreadsheet.") - raise SpreadsheetImportError("No valid samples found in the spreadsheet.") - - logger.info(f"...finished import, got {len(model)} samples") - logger.debug(f"Model data: {model}") self.model = model - - try: - validated_model = self.validate() - except SpreadsheetImportError as e: - logger.error(f"Failed to validate spreadsheet: {str(e)}") - raise - - return validated_model - - def validate(self): - model = self.model - logger.info(f"...validating {len(model)} samples") - - for sample in model: - logger.info(f"Validating sample: {sample}") - - validated_model = self.data_model_validation(SpreadsheetModel, model) - - for sample in validated_model: - logger.info(f"Validated sample: {sample}") - - logger.debug(f"Validated model data: {validated_model}") - return validated_model - - @staticmethod - def data_model_validation(data_model, model): - try: - validated = parse_obj_as(List[data_model], model) - except ValidationError as e: - logger.error(f"Validation error: {e.errors()}") - raise SpreadsheetImportError(f"{e.errors()[0]['loc']} => {e.errors()[0]['msg']}") - - validated_model = [dict(value) for value in validated] - return validated_model \ No newline at end of file + logger.info(f"Finished processing {len(model)} records") + return self.model