Validator functionnal

2024-11-06 15:54:09 +01:00
parent 91468da9ed
commit 3cf9c669b9
3 changed files with 247 additions and 411 deletions
--- a/backend/app/routers/spreadsheet.py
+++ b/backend/app/routers/spreadsheet.py
@ -1,5 +1,3 @@
-# app/routes/spreadsheet.py
-
 from fastapi import APIRouter, UploadFile, File, HTTPException
 import logging
 from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
@ -7,6 +5,7 @@ from app.services.spreadsheet_service import SampleSpreadsheetImporter, Spreadsh
 router = APIRouter()
 logger = logging.getLogger(__name__)

+
@router.post("/upload")
 async def upload_file(file: UploadFile = File(...)):
    try:
@ -22,9 +21,9 @@ async def upload_file(file: UploadFile = File(...)):
        validated_model = importer.import_spreadsheet(file)
        logger.info(f"Validated model: {validated_model}")

-        dewars = {sample['dewarname'] for sample in validated_model if 'dewarname' in sample}
-        pucks = {sample['puckname'] for sample in validated_model if 'puckname' in sample}
-        samples = {sample['crystalname'] for sample in validated_model if 'crystalname' in sample}
+        dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
+        pucks = {sample.puckname for sample in validated_model if sample.puckname}
+        samples = {sample.crystalname for sample in validated_model if sample.crystalname}

        # Logging the sets of names
        logger.info(f"Dewar Names: {dewars}")
--- a/backend/app/sample_models.py
+++ b/backend/app/sample_models.py
@ -1,65 +1,70 @@
 import re
-from typing import Any, Optional, Union
-from pydantic import BaseModel, Field, field_validator, AliasChoices
+from typing import Any, Optional
+from pydantic import BaseModel, Field, field_validator
 from typing_extensions import Annotated


 class SpreadsheetModel(BaseModel):
    dewarname: str = Field(..., alias='dewarname')
    puckname: str = Field(..., alias='puckname')
-    pucktype: Optional[str] = "unipuck"
-    pucklocationindewar: Optional[Union[int, str]]
+    pucktype: Optional[str] = Field(None, alias="pucktype")
    crystalname: Annotated[
        str,
        Field(...,
              max_length=64,
              title="Crystal Name",
-              description="""max_length imposed by MTZ file header format
-                        https://www.ccp4.ac.uk/html/mtzformat.html""",
+              description="max_length imposed by MTZ file header format https://www.ccp4.ac.uk/html/mtzformat.html",
              alias='crystalname'
-              ),
+        ),
    ]
-    positioninpuck: int
+    positioninpuck: int  # Only accept positive integers between 1 and 16
    priority: Optional[int]
    comments: Optional[str]
-    pinbarcode: Optional[str]
    directory: Optional[str]
-    proteinname: Any = ""
-    oscillation: Any = ""
-    exposure: Any = ""
-    totalrange: Any = ""
-    transmission: Any = ""
-    targetresolution: Any = ""
-    aperture: Any = ""
-    datacollectiontype: Any = ""
-    processingpipeline: Any = ""
-    spacegroupnumber: Any = ""
-    cellparameters: Any = ""
-    rescutkey: Any = ""
-    rescutvalue: Any = ""
-    userresolution: Any = ""
-    pdbmodel: Any = ""
-    autoprocfull: Any = ""
-    procfull: Any = ""
-    adpenabled: Any = ""
-    noano: Any = ""
-    trustedhigh: Any = ""
-    ffcscampaign: Any = ""
-    autoprocextraparams: Any = ""
-    chiphiangles: Any = ""
+    proteinname: Optional[str] = ""  # Alphanumeric validation
+    oscillation: Optional[float] = None  # Only accept positive float
+    exposure: Optional[float] = None  # Only accept positive floats between 0 and 1
+    totalrange: Optional[int] = None  # Only accept positive integers between 0 and 360
+    transmission: Optional[int] = None  # Only accept positive integers between 0 and 100
+    targetresolution: Optional[float] = None  # Only accept positive float
+    aperture: Optional[str] = None  # Optional string field
+    datacollectiontype: Optional[str] = None  # Only accept "standard", other types might be added later
+    processingpipeline: Optional[str] = ""  # Only accept "gopy", "autoproc", "xia2dials"
+    spacegroupnumber: Optional[int] = None  # Only accept positive integers between 1 and 230
+    cellparameters: Optional[str] = None  # Must be a set of six positive floats or integers
+    rescutkey: Optional[str] = None  # Only accept "is" or "cchalf"
+    rescutvalue: Optional[float] = None  # Must be a positive float if rescutkey is provided
+    userresolution: Optional[float] = None
+    pdbid: Optional[str] = ""  # Accepts either the format of the protein data bank code or {provided}
+    autoprocfull: Optional[bool] = None
+    procfull: Optional[bool] = None
+    adpenabled: Optional[bool] = None
+    noano: Optional[bool] = None
+    ffcscampaign: Optional[bool] = None
+    trustedhigh: Optional[float] = None  # Should be a float between 0 and 2.0
+    autoprocextraparams: Optional[str] = None  # Optional string field
+    chiphiangles: Optional[float] = None  # Optional float field between 0 and 30
+    dose: Optional[float] = None  # Optional float field

+    # Add pucktype validation
+    @field_validator('pucktype', mode="before")
+    @classmethod
+    def validate_pucktype(cls, v):
+        if v != "unipuck":
+            raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
+        return v
+
+    # Validators
    @field_validator('dewarname', 'puckname', mode="before")
    @classmethod
    def dewarname_puckname_characters(cls, v):
        if v:
-            assert (
-                    len(str(v)) > 0
-            ), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
-            v = str(v).replace(" ", "_")
+            v = str(v).strip().replace(" ", "_").upper()
            if re.search("\n", v):
                assert v.isalnum(), "is not valid. newline character detected."
            v = re.sub(r"\.0$", "", v)
-            return v.upper()
+            return v
+        raise ValueError("Value must be provided for dewarname and puckname.")

    @field_validator('crystalname', mode="before")
    @classmethod
@ -68,9 +73,7 @@ class SpreadsheetModel(BaseModel):
        if re.search("\n", v):
            assert v.isalnum(), "is not valid. newline character detected."
        characters = re.sub("[._+-]", "", v)
-        assert characters.isalnum(), f"""" {v} " is not valid.
-            must contain only alphanumeric and . _ + - characters"""
-        v = re.sub(r"\.0$", "", v)
+        assert characters.isalnum(), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
        return v

    @field_validator('directory', mode="before")
@ -79,343 +82,182 @@ class SpreadsheetModel(BaseModel):
        if v:
            v = str(v).strip("/").replace(" ", "_")
            if re.search("\n", v):
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                                 newline character detected."""
-                )
-            ok = "[a-z0-9_.+-]"
-            directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE)
-            if not directory_re.match(v):
-                raise ValueError(
-                    f"' {v} ' is not valid. value must be a path or macro."
-                )
+                raise ValueError(f" '{v}' is not valid. newline character detected.")

-            these_macros = re.findall(r"(\{[^}]+\})", v)
-            valid_macros = [
-                "{date}",
-                "{prefix}",
-                "{sgpuck}",
-                "{puck}",
-                "{beamline}",
-                "{sgprefix}",
-                "{sgpriority}",
-                "{sgposition}",
-                "{protein}",
-                "{method}",
-            ]
-            for m in these_macros:
-                if m.lower() not in valid_macros:
-                    raise ValueError(
-                        f"""" {m} " is not a valid macro, please re-check documentation;
-                        allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix,
-                        sgpriority, sgposition, protein, method"""
-                    )
+            valid_macros = ["{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}",
+                            "{sgpriority}", "{sgposition}", "{protein}", "{method}"]
+            pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
+            v = pattern.sub('macro', v)
+
+            allowed_chars = "[a-z0-9_.+-]"
+            directory_re = re.compile(f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE)
+            if not directory_re.match(v):
+                raise ValueError(f" '{v}' is not valid. Value must be a valid path or macro.")
        return v

    @field_validator('positioninpuck', mode="before")
    @classmethod
    def positioninpuck_possible(cls, v):
-        if v:
-            try:
-                v = int(float(v))
-                if v < 1 or v > 16:
-                    raise ValueError(
-                        f"""" {v} " is not valid. value must be from 1 to 16."""
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    Value must be a numeric type and from 1 to 16."""
-                ) from e
-        else:
-            raise ValueError("Value must be provided. Value must be from 1 to 16.")
+        if not isinstance(v, int) or v < 1 or v > 16:
+            raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 16.")
        return v

-    @field_validator('pucklocationindewar', mode="before")
-    @classmethod
-    def pucklocationindewar_convert_to_str(cls, v):
-        if v == "Unipuck":
-            return v
-        try:
-            return str(int(float(v)))
-        except ValueError:
-            raise ValueError(f"Value error, could not convert string to float: '{v}'")
-
    @field_validator('priority', mode="before")
    @classmethod
    def priority_positive(cls, v):
        if v is not None:
-            v = str(v).strip()
-            v = re.sub(r"\.0$", "", v)
            try:
-                if int(v) <= 0:
-                    raise ValueError(
-                        f" '{v}' is not valid. Value must be a positive integer."
-                    )
                v = int(v)
+                if v <= 0:
+                    raise ValueError(f" '{v}' is not valid. Value must be a positive integer.")
            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be a positive integer."
-                ) from e
+                raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") from e
        return v

    @field_validator('aperture', mode="before")
    @classmethod
    def aperture_selection(cls, v):
-        if v:
+        if v is not None:
            try:
                v = int(float(v))
-                if v not in [1, 2, 3]:
-                    raise ValueError(
-                        f"""" {v} " is not valid.
-                        value must be integer 1, 2 or 3."""
-                    )
+                if v not in {1, 2, 3}:
+                    raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    value must be integer 1, 2 or 3."""
-                ) from e
+                raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") from e
        return v

-    @field_validator(
-        "oscillation",
-        "exposure",
-        "totalrange",
-        "targetresolution",
-        "rescutvalue",
-        "userresolution",
-        mode="before"
-    )
+    @field_validator('oscillation', 'targetresolution', mode="before")
    @classmethod
-    def parameter_positive_float(cls, v):
-        if v:
+    def positive_float_validator(cls, v):
+        if v is not None:
            try:
                v = float(v)
-                if not v > 0:
-                    raise ValueError(
-                        f"""" {v} " is not valid.
-                        value must be a positive float."""
-                    )
+                if v <= 0:
+                    raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    value must be a positive float."""
-                ) from e
+                raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
+        return v
+
+    @field_validator('exposure', mode="before")
+    @classmethod
+    def exposure_in_range(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 1):
+                    raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.")
+            except (ValueError, TypeError) as e:
+                raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") from e
+        return v
+
+    @field_validator('totalrange', mode="before")
+    @classmethod
+    def totalrange_in_range(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (0 <= v <= 360):
+                    raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.")
+            except (ValueError, TypeError) as e:
+                raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") from e
        return v

    @field_validator('transmission', mode="before")
    @classmethod
-    def tranmission_fraction(cls, v):
-        if v:
+    def transmission_fraction(cls, v):
+        if v is not None:
            try:
-                v = float(v)
-                if 100 >= v > 0:
-                    v = v
-                else:
-                    raise ValueError(
-                        f"""" {v} " is not valid.
-                        value must be a float between 0 and 100."""
-                    )
+                v = int(v)
+                if not (0 <= v <= 100):
+                    raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.")
            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    value must be a float between 0 and 100."""
-                ) from e
+                raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") from e
        return v

    @field_validator('datacollectiontype', mode="before")
    @classmethod
    def datacollectiontype_allowed(cls, v):
-        if v:
-            v = v.lower()
-            allowed = ["standard", "serial-xtal", "multi-orientation"]
-            if str(v) not in allowed:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                                 value must be one of" {allowed} "."""
-                )
+        allowed = {"standard"}  # Other types of data collection might be added later
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
        return v

    @field_validator('processingpipeline', mode="before")
    @classmethod
    def processingpipeline_allowed(cls, v):
-        if v:
-            v = v.lower()
-            allowed = ["gopy", "autoproc", "xia2dials"]
-            if str(v) not in allowed:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                                 value must be one of " {allowed} "."""
-                )
+        allowed = {"gopy", "autoproc", "xia2dials"}
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
        return v

-    @field_validator('spacegroupnumber', mode="before")
-    @classmethod
-    def spacegroupnumber_integer(cls, v):
-        if v:
-            try:
-                v = int(float(v))
-                if not v > 0 or not v < 231:
-                    raise ValueError(
-                        f"""" {v} " is not valid.
-                        value must be a positive integer between 1 and 230."""
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    value must be a positive integer between 1 and 230."""
-                ) from e
-        return v
-
-    @field_validator('cellparameters', mode="before")
-    @classmethod
-    def cellparameters_positive_floats(cls, v):
-        if v:
-            splitted = str(v).split(" ")
-            if len(splitted) != 6:
-                raise ValueError(
-                    f"' {v} ' is not valid. value must be a set of six numbers."
-                )
-            for el in splitted:
+        @field_validator('spacegroupnumber', mode="before")
+        @classmethod
+        def spacegroupnumber_allowed(cls, v):
+            if v is not None:
                try:
-                    el = float(el)
-                    if not el > 0:
-                        raise ValueError(
-                            f"' {el} ' is not valid. value must be a positive float."
-                        )
+                    v = int(v)
+                    if not (1 <= v <= 230):
+                        raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.")
                except (ValueError, TypeError) as e:
-                    raise ValueError(
-                        f"' {el} ' is not valid. value must be a positive float."
-                    ) from e
-        return v
+                    raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") from e
+            return v

-    @field_validator('rescutkey', mode="before")
-    @classmethod
-    def rescutkey_allowed(cls, v):
-        if v:
-            v = v.lower()
-            allowed = ["is", "cchalf"]
-            if str(v) not in allowed:
-                raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
-        return v
+        @field_validator('cellparameters', mode="before")
+        @classmethod
+        def cellparameters_format(cls, v):
+            if v:
+                values = [float(i) for i in v.split(",")]
+                if len(values) != 6 or any(val <= 0 for val in values):
+                    raise ValueError(f" '{v}' is not valid. Value must be a set of six positive floats or integers.")
+            return v

-    @field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
-    @classmethod
-    def boolean_allowed(cls, v):
-        if v:
-            v = v.title()
-            allowed = ["False", "True"]
-            if str(v) not in allowed:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                                 value must be ' {allowed} '."""
-                )
-        return v
+        @field_validator('rescutkey', 'rescutvalue', mode="before")
+        @classmethod
+        def rescutkey_value_pair(cls, values):
+            rescutkey = values.get('rescutkey')
+            rescutvalue = values.get('rescutvalue')
+            if rescutkey and rescutvalue:
+                if rescutkey not in {"is", "cchalf"}:
+                    raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
+                if not isinstance(rescutvalue, float) or rescutvalue <= 0:
+                    raise ValueError("Rescutvalue must be a positive float if rescutkey is provided")
+            return values

-    @field_validator('trustedhigh', mode="before")
-    @classmethod
-    def trusted_float(cls, v):
-        if v:
-            try:
-                v = float(v)
-                if 2.0 >= v > 0:
-                    v = v
-                else:
-                    raise ValueError(
-                        f"""" {v} " is not valid.
-                        value must be a float between 0 and 2.0."""
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid.
-                    value must be a float between 0 and 2.0."""
-                ) from e
-        return v
+        @field_validator('trustedhigh', mode="before")
+        @classmethod
+        def trustedhigh_allowed(cls, v):
+            if v is not None:
+                try:
+                    v = float(v)
+                    if not (0 <= v <= 2.0):
+                        raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.")
+                except (ValueError, TypeError) as e:
+                    raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") from e
+            return v

-    @field_validator('proteinname', mode="before")
-    @classmethod
-    def proteinname_characters(cls, v):
-        if v:
-            v = str(v).replace(" ", "_")
-            if re.search("\n", v):
-                assert v.isalnum(), "is not valid. newline character detected."
-            characters = re.sub("[._+-]", "", v)
-            assert characters.isalnum(), f"""" {v} " is not valid.
-                must contain only alphanumeric and . _ + - characters"""
-            v = re.sub(r"\.0$", "", v)
-        return v
+        @field_validator('chiphiangles', mode="before")
+        @classmethod
+        def chiphiangles_allowed(cls, v):
+            if v is not None:
+                try:
+                    v = float(v)
+                    if not (0 <= v <= 30):
+                        raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.")
+                except (ValueError, TypeError) as e:
+                    raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") from e
+            return v

-    @field_validator('chiphiangles', mode="before")
-    @classmethod
-    def chiphiangles_value(cls, v):
-        if v:
-            try:
-                v = str(v)
-                v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
-                list_of_strings = re.findall(r"\(.*?\)", v)
-                list_of_tuples = []
-                for el in list_of_strings:
-                    first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1]
-                    second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1]
-                    my_tuple = (float(first), float(second))
-                    list_of_tuples.append(my_tuple)
-                v = list_of_tuples
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f"""" {v} " is not valid. Example format is
-                    (0.0, 0.0), (20.0, 0.0), (30, 0.0)"""
-                ) from e
-        return v
+        @field_validator('dose', mode="before")
+        @classmethod
+        def dose_positive(cls, v):
+            if v is not None:
+                try:
+                    v = float(v)
+                    if v <= 0:
+                        raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
+                except (ValueError, TypeError) as e:
+                    raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
+            return v

-    @field_validator(
-        "priority",
-        "comments",
-        "pinbarcode",
-        "directory",
-        "proteinname",
-        "oscillation",
-        "exposure",
-        "totalrange",
-        "transmission",
-        "targetresolution",
-        "aperture",
-        "datacollectiontype",
-        "processingpipeline",
-        "spacegroupnumber",
-        "cellparameters",
-        "rescutkey",
-        "rescutvalue",
-        "userresolution",
-        "pdbmodel",
-        "autoprocfull",
-        "procfull",
-        "adpenabled",
-        "noano",
-        "trustedhigh",
-        "ffcscampaign",
-        "autoprocextraparams",
-        "chiphiangles",
-        mode="before"
-    )
-    @classmethod
-    def set_default_emptystring(cls, v):
-        return v or ""
-
-    class Config:
-        str_strip_whitespace = True
-        aliases = {
-            'dewarname': 'dewarname',
-            'puckname': 'puckname',
-            'crystalname': 'crystalname',
-        }
-
-
-class TELLModel(SpreadsheetModel):
-    input_order: int
-    samplemountcount: int = 0
-    samplestatus: str = "not present"
-    puckaddress: str = "---"
-    username: str
-    puck_number: int
-    prefix: Optional[str]
-    folder: Optional[str]
+        class TELLModel(SpreadsheetModel):
+            pass  # Extend the SpreadsheetModel with TELL-specific fields if needed
--- a/backend/app/services/spreadsheet_service.py
+++ b/backend/app/services/spreadsheet_service.py
@ -1,39 +1,46 @@
 import logging
 import openpyxl
-from pydantic import ValidationError, parse_obj_as
-from typing import List
-from app.sample_models import SpreadsheetModel
+from pydantic import ValidationError
+from typing import Union
 from io import BytesIO
+from app.sample_models import SpreadsheetModel

-UNASSIGNED_PUCKADDRESS = "---"
-logging.basicConfig(level=logging.DEBUG)  # Change to DEBUG level to see more logs
+logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)

+
 class SpreadsheetImportError(Exception):
    pass

+
 class SampleSpreadsheetImporter:
    def __init__(self):
        self.filename = None
        self.model = None
-        self.available_puck_positions = []

-    def _clean_value(self, value):
+    def _clean_value(self, value, expected_type=None):
        """Clean value by converting it to the expected type and stripping whitespace for strings."""
+        if value is None:
+            return None
+        if expected_type == str:
+            return str(value).strip()
+        if expected_type in [float, int]:
+            try:
+                return expected_type(value)
+            except ValueError:
+                return None
        if isinstance(value, str):
-            return value.strip()
-        elif isinstance(value, (float, int)):
-            return str(value)  # Always return strings for priority field validation
+            try:
+                if '.' in value:
+                    return float(value)
+                else:
+                    return int(value)
+            except ValueError:
+                return value.strip()
        return value

    def import_spreadsheet(self, file):
-        # Reinitialize state
-        self.available_puck_positions = [
-            f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6)
-        ]
-        self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS)
        self.model = []
-
        self.filename = file.filename
        logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")

@ -68,73 +75,61 @@ class SampleSpreadsheetImporter:
            logger.error("The 'Samples' worksheet is empty.")
            raise SpreadsheetImportError("The 'Samples' worksheet is empty.")

+        expected_columns = 32  # Number of columns expected based on the model
+
        for index, row in enumerate(rows):
-            if not row or all(value is None for value in row):
-                logger.debug(f"Skipping empty row or row with all None values at index {index}.")
+            if not any(row):
+                logger.debug(f"Skipping empty row at index {index}")
                continue

+            # Pad the row to ensure it has the expected number of columns
+            if len(row) < expected_columns:
+                row = list(row) + [None] * (expected_columns - len(row))
+
+            record = {
+                'dewarname': self._clean_value(row[0], str),
+                'puckname': self._clean_value(row[1], str),
+                'pucktype': self._clean_value(row[2], str),
+                'crystalname': self._clean_value(row[3], str),
+                'positioninpuck': self._clean_value(row[4], int),
+                'priority': self._clean_value(row[5], int),
+                'comments': self._clean_value(row[6], str),
+                'directory': self._clean_value(row[7], str),
+                'proteinname': self._clean_value(row[8], str),
+                'oscillation': self._clean_value(row[9], float),
+                'aperture': self._clean_value(row[10], str),
+                'exposure': self._clean_value(row[11], float),
+                'totalrange': self._clean_value(row[12], float),
+                'transmission': self._clean_value(row[13], int),
+                'dose': self._clean_value(row[14], float),
+                'targetresolution': self._clean_value(row[15], float),
+                'datacollectiontype': self._clean_value(row[16], str),
+                'processingpipeline': self._clean_value(row[17], str),
+                'spacegroupnumber': self._clean_value(row[18], int),
+                'cellparameters': self._clean_value(row[19], str),
+                'rescutkey': self._clean_value(row[20], str),
+                'rescutvalue': self._clean_value(row[21], str),
+                'userresolution': self._clean_value(row[22], str),
+                'pdbid': self._clean_value(row[23], str),
+                'autoprocfull': self._clean_value(row[24], str),
+                'procfull': self._clean_value(row[25], str),
+                'adpenabled': self._clean_value(row[26], str),
+                'noano': self._clean_value(row[27], str),
+                'ffcscampaign': self._clean_value(row[28], str),
+                'trustedhigh': self._clean_value(row[29], str),
+                'autoprocextraparams': self._clean_value(row[30], str),
+                'chiphiangles': self._clean_value(row[31], str)
+            }
+
            try:
-                sample = {
-                    'dewarname': self._clean_value(row[0]),
-                    'puckname': self._clean_value(row[1]),
-                    'pucklocationindewar': self._clean_value(row[2]) if len(row) > 2 else None,
-                    'positioninpuck': self._clean_value(row[3]) if len(row) > 3 else None,
-                    'crystalname': self._clean_value(row[4]),
-                    'priority': self._clean_value(row[5]) if len(row) > 5 else None,
-                    'comments': self._clean_value(row[6]) if len(row) > 6 else None,
-                    'pinbarcode': self._clean_value(row[7]) if len(row) > 7 else None,
-                    'directory': self._clean_value(row[8]) if len(row) > 8 else None,
-                }
-            except IndexError:
-                logger.error(f"Index error processing row at index {index}: Row has missing values.")
-                raise SpreadsheetImportError(f"Index error processing row at index {index}: Row has missing values.")
+                validated_record = SpreadsheetModel(**record)
+                model.append(validated_record)
+                logger.debug(f"Row {index + 4} processed and validated successfully")
+            except ValidationError as e:
+                error_message = f"Validation error in row {index + 4}: {e}"
+                logger.error(error_message)
+                raise SpreadsheetImportError(error_message)

-            # Skip rows missing essential fields
-            if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']:
-                logger.debug(f"Skipping row due to missing essential fields: {row}")
-                continue
-
-            model.append(sample)
-            logger.info(f"Sample processed: {sample}")
-
-        if not model:
-            logger.error("No valid samples found in the spreadsheet.")
-            raise SpreadsheetImportError("No valid samples found in the spreadsheet.")
-
-        logger.info(f"...finished import, got {len(model)} samples")
-        logger.debug(f"Model data: {model}")
        self.model = model
-
-        try:
-            validated_model = self.validate()
-        except SpreadsheetImportError as e:
-            logger.error(f"Failed to validate spreadsheet: {str(e)}")
-            raise
-
-        return validated_model
-
-    def validate(self):
-        model = self.model
-        logger.info(f"...validating {len(model)} samples")
-
-        for sample in model:
-            logger.info(f"Validating sample: {sample}")
-
-        validated_model = self.data_model_validation(SpreadsheetModel, model)
-
-        for sample in validated_model:
-            logger.info(f"Validated sample: {sample}")
-
-        logger.debug(f"Validated model data: {validated_model}")
-        return validated_model
-
-    @staticmethod
-    def data_model_validation(data_model, model):
-        try:
-            validated = parse_obj_as(List[data_model], model)
-        except ValidationError as e:
-            logger.error(f"Validation error: {e.errors()}")
-            raise SpreadsheetImportError(f"{e.errors()[0]['loc']} => {e.errors()[0]['msg']}")
-
-        validated_model = [dict(value) for value in validated]
-        return validated_model
+        logger.info(f"Finished processing {len(model)} records")
+        return self.model