Validator functionnal

This commit is contained in:
GotthardG 2024-11-06 15:54:09 +01:00
parent 91468da9ed
commit 3cf9c669b9
3 changed files with 247 additions and 411 deletions

View File

@ -1,5 +1,3 @@
# app/routes/spreadsheet.py
from fastapi import APIRouter, UploadFile, File, HTTPException from fastapi import APIRouter, UploadFile, File, HTTPException
import logging import logging
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
@ -7,6 +5,7 @@ from app.services.spreadsheet_service import SampleSpreadsheetImporter, Spreadsh
router = APIRouter() router = APIRouter()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@router.post("/upload") @router.post("/upload")
async def upload_file(file: UploadFile = File(...)): async def upload_file(file: UploadFile = File(...)):
try: try:
@ -22,9 +21,9 @@ async def upload_file(file: UploadFile = File(...)):
validated_model = importer.import_spreadsheet(file) validated_model = importer.import_spreadsheet(file)
logger.info(f"Validated model: {validated_model}") logger.info(f"Validated model: {validated_model}")
dewars = {sample['dewarname'] for sample in validated_model if 'dewarname' in sample} dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
pucks = {sample['puckname'] for sample in validated_model if 'puckname' in sample} pucks = {sample.puckname for sample in validated_model if sample.puckname}
samples = {sample['crystalname'] for sample in validated_model if 'crystalname' in sample} samples = {sample.crystalname for sample in validated_model if sample.crystalname}
# Logging the sets of names # Logging the sets of names
logger.info(f"Dewar Names: {dewars}") logger.info(f"Dewar Names: {dewars}")

View File

@ -1,65 +1,70 @@
import re import re
from typing import Any, Optional, Union from typing import Any, Optional
from pydantic import BaseModel, Field, field_validator, AliasChoices from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated from typing_extensions import Annotated
class SpreadsheetModel(BaseModel): class SpreadsheetModel(BaseModel):
dewarname: str = Field(..., alias='dewarname') dewarname: str = Field(..., alias='dewarname')
puckname: str = Field(..., alias='puckname') puckname: str = Field(..., alias='puckname')
pucktype: Optional[str] = "unipuck" pucktype: Optional[str] = Field(None, alias="pucktype")
pucklocationindewar: Optional[Union[int, str]]
crystalname: Annotated[ crystalname: Annotated[
str, str,
Field(..., Field(...,
max_length=64, max_length=64,
title="Crystal Name", title="Crystal Name",
description="""max_length imposed by MTZ file header format description="max_length imposed by MTZ file header format https://www.ccp4.ac.uk/html/mtzformat.html",
https://www.ccp4.ac.uk/html/mtzformat.html""",
alias='crystalname' alias='crystalname'
), ),
] ]
positioninpuck: int positioninpuck: int # Only accept positive integers between 1 and 16
priority: Optional[int] priority: Optional[int]
comments: Optional[str] comments: Optional[str]
pinbarcode: Optional[str]
directory: Optional[str] directory: Optional[str]
proteinname: Any = "" proteinname: Optional[str] = "" # Alphanumeric validation
oscillation: Any = "" oscillation: Optional[float] = None # Only accept positive float
exposure: Any = "" exposure: Optional[float] = None # Only accept positive floats between 0 and 1
totalrange: Any = "" totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
transmission: Any = "" transmission: Optional[int] = None # Only accept positive integers between 0 and 100
targetresolution: Any = "" targetresolution: Optional[float] = None # Only accept positive float
aperture: Any = "" aperture: Optional[str] = None # Optional string field
datacollectiontype: Any = "" datacollectiontype: Optional[str] = None # Only accept "standard", other types might be added later
processingpipeline: Any = "" processingpipeline: Optional[str] = "" # Only accept "gopy", "autoproc", "xia2dials"
spacegroupnumber: Any = "" spacegroupnumber: Optional[int] = None # Only accept positive integers between 1 and 230
cellparameters: Any = "" cellparameters: Optional[str] = None # Must be a set of six positive floats or integers
rescutkey: Any = "" rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
rescutvalue: Any = "" rescutvalue: Optional[float] = None # Must be a positive float if rescutkey is provided
userresolution: Any = "" userresolution: Optional[float] = None
pdbmodel: Any = "" pdbid: Optional[str] = "" # Accepts either the format of the protein data bank code or {provided}
autoprocfull: Any = "" autoprocfull: Optional[bool] = None
procfull: Any = "" procfull: Optional[bool] = None
adpenabled: Any = "" adpenabled: Optional[bool] = None
noano: Any = "" noano: Optional[bool] = None
trustedhigh: Any = "" ffcscampaign: Optional[bool] = None
ffcscampaign: Any = "" trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
autoprocextraparams: Any = "" autoprocextraparams: Optional[str] = None # Optional string field
chiphiangles: Any = "" chiphiangles: Optional[float] = None # Optional float field between 0 and 30
dose: Optional[float] = None # Optional float field
# Add pucktype validation
@field_validator('pucktype', mode="before")
@classmethod
def validate_pucktype(cls, v):
if v != "unipuck":
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
return v
# Validators
@field_validator('dewarname', 'puckname', mode="before") @field_validator('dewarname', 'puckname', mode="before")
@classmethod @classmethod
def dewarname_puckname_characters(cls, v): def dewarname_puckname_characters(cls, v):
if v: if v:
assert ( v = str(v).strip().replace(" ", "_").upper()
len(str(v)) > 0
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
v = str(v).replace(" ", "_")
if re.search("\n", v): if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected." assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v) v = re.sub(r"\.0$", "", v)
return v.upper() return v
raise ValueError("Value must be provided for dewarname and puckname.")
@field_validator('crystalname', mode="before") @field_validator('crystalname', mode="before")
@classmethod @classmethod
@ -68,9 +73,7 @@ class SpreadsheetModel(BaseModel):
if re.search("\n", v): if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected." assert v.isalnum(), "is not valid. newline character detected."
characters = re.sub("[._+-]", "", v) characters = re.sub("[._+-]", "", v)
assert characters.isalnum(), f"""" {v} " is not valid. assert characters.isalnum(), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
must contain only alphanumeric and . _ + - characters"""
v = re.sub(r"\.0$", "", v)
return v return v
@field_validator('directory', mode="before") @field_validator('directory', mode="before")
@ -79,343 +82,182 @@ class SpreadsheetModel(BaseModel):
if v: if v:
v = str(v).strip("/").replace(" ", "_") v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v): if re.search("\n", v):
raise ValueError( raise ValueError(f" '{v}' is not valid. newline character detected.")
f"""" {v} " is not valid.
newline character detected."""
)
ok = "[a-z0-9_.+-]"
directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE)
if not directory_re.match(v):
raise ValueError(
f"' {v} ' is not valid. value must be a path or macro."
)
these_macros = re.findall(r"(\{[^}]+\})", v) valid_macros = ["{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}",
valid_macros = [ "{sgpriority}", "{sgposition}", "{protein}", "{method}"]
"{date}", pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
"{prefix}", v = pattern.sub('macro', v)
"{sgpuck}",
"{puck}", allowed_chars = "[a-z0-9_.+-]"
"{beamline}", directory_re = re.compile(f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE)
"{sgprefix}", if not directory_re.match(v):
"{sgpriority}", raise ValueError(f" '{v}' is not valid. Value must be a valid path or macro.")
"{sgposition}",
"{protein}",
"{method}",
]
for m in these_macros:
if m.lower() not in valid_macros:
raise ValueError(
f"""" {m} " is not a valid macro, please re-check documentation;
allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix,
sgpriority, sgposition, protein, method"""
)
return v return v
@field_validator('positioninpuck', mode="before") @field_validator('positioninpuck', mode="before")
@classmethod @classmethod
def positioninpuck_possible(cls, v): def positioninpuck_possible(cls, v):
if v: if not isinstance(v, int) or v < 1 or v > 16:
try: raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 16.")
v = int(float(v))
if v < 1 or v > 16:
raise ValueError(
f"""" {v} " is not valid. value must be from 1 to 16."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
Value must be a numeric type and from 1 to 16."""
) from e
else:
raise ValueError("Value must be provided. Value must be from 1 to 16.")
return v return v
@field_validator('pucklocationindewar', mode="before")
@classmethod
def pucklocationindewar_convert_to_str(cls, v):
if v == "Unipuck":
return v
try:
return str(int(float(v)))
except ValueError:
raise ValueError(f"Value error, could not convert string to float: '{v}'")
@field_validator('priority', mode="before") @field_validator('priority', mode="before")
@classmethod @classmethod
def priority_positive(cls, v): def priority_positive(cls, v):
if v is not None: if v is not None:
v = str(v).strip()
v = re.sub(r"\.0$", "", v)
try: try:
if int(v) <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
)
v = int(v) v = int(v)
if v <= 0:
raise ValueError(f" '{v}' is not valid. Value must be a positive integer.")
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") from e
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v return v
@field_validator('aperture', mode="before") @field_validator('aperture', mode="before")
@classmethod @classmethod
def aperture_selection(cls, v): def aperture_selection(cls, v):
if v: if v is not None:
try: try:
v = int(float(v)) v = int(float(v))
if v not in [1, 2, 3]: if v not in {1, 2, 3}:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
f"""" {v} " is not valid.
value must be integer 1, 2 or 3."""
)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") from e
f"""" {v} " is not valid.
value must be integer 1, 2 or 3."""
) from e
return v return v
@field_validator( @field_validator('oscillation', 'targetresolution', mode="before")
"oscillation",
"exposure",
"totalrange",
"targetresolution",
"rescutvalue",
"userresolution",
mode="before"
)
@classmethod @classmethod
def parameter_positive_float(cls, v): def positive_float_validator(cls, v):
if v: if v is not None:
try: try:
v = float(v) v = float(v)
if not v > 0: if v <= 0:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
f"""" {v} " is not valid.
value must be a positive float."""
)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
f"""" {v} " is not valid. return v
value must be a positive float."""
) from e @field_validator('exposure', mode="before")
@classmethod
def exposure_in_range(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 1):
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.")
except (ValueError, TypeError) as e:
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") from e
return v
@field_validator('totalrange', mode="before")
@classmethod
def totalrange_in_range(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 360):
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.")
except (ValueError, TypeError) as e:
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") from e
return v return v
@field_validator('transmission', mode="before") @field_validator('transmission', mode="before")
@classmethod @classmethod
def tranmission_fraction(cls, v): def transmission_fraction(cls, v):
if v: if v is not None:
try: try:
v = float(v) v = int(v)
if 100 >= v > 0: if not (0 <= v <= 100):
v = v raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.")
else:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 100."""
)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") from e
f"""" {v} " is not valid.
value must be a float between 0 and 100."""
) from e
return v return v
@field_validator('datacollectiontype', mode="before") @field_validator('datacollectiontype', mode="before")
@classmethod @classmethod
def datacollectiontype_allowed(cls, v): def datacollectiontype_allowed(cls, v):
if v: allowed = {"standard"} # Other types of data collection might be added later
v = v.lower() if v and v.lower() not in allowed:
allowed = ["standard", "serial-xtal", "multi-orientation"] raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
if str(v) not in allowed:
raise ValueError(
f"""" {v} " is not valid.
value must be one of" {allowed} "."""
)
return v return v
@field_validator('processingpipeline', mode="before") @field_validator('processingpipeline', mode="before")
@classmethod @classmethod
def processingpipeline_allowed(cls, v): def processingpipeline_allowed(cls, v):
if v: allowed = {"gopy", "autoproc", "xia2dials"}
v = v.lower() if v and v.lower() not in allowed:
allowed = ["gopy", "autoproc", "xia2dials"] raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
if str(v) not in allowed:
raise ValueError(
f"""" {v} " is not valid.
value must be one of " {allowed} "."""
)
return v return v
@field_validator('spacegroupnumber', mode="before") @field_validator('spacegroupnumber', mode="before")
@classmethod @classmethod
def spacegroupnumber_integer(cls, v): def spacegroupnumber_allowed(cls, v):
if v: if v is not None:
try:
v = int(float(v))
if not v > 0 or not v < 231:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer between 1 and 230."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer between 1 and 230."""
) from e
return v
@field_validator('cellparameters', mode="before")
@classmethod
def cellparameters_positive_floats(cls, v):
if v:
splitted = str(v).split(" ")
if len(splitted) != 6:
raise ValueError(
f"' {v} ' is not valid. value must be a set of six numbers."
)
for el in splitted:
try: try:
el = float(el) v = int(v)
if not el > 0: if not (1 <= v <= 230):
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.")
f"' {el} ' is not valid. value must be a positive float."
)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") from e
f"' {el} ' is not valid. value must be a positive float." return v
) from e
return v
@field_validator('rescutkey', mode="before") @field_validator('cellparameters', mode="before")
@classmethod @classmethod
def rescutkey_allowed(cls, v): def cellparameters_format(cls, v):
if v: if v:
v = v.lower() values = [float(i) for i in v.split(",")]
allowed = ["is", "cchalf"] if len(values) != 6 or any(val <= 0 for val in values):
if str(v) not in allowed: raise ValueError(f" '{v}' is not valid. Value must be a set of six positive floats or integers.")
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.") return v
return v
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before") @field_validator('rescutkey', 'rescutvalue', mode="before")
@classmethod @classmethod
def boolean_allowed(cls, v): def rescutkey_value_pair(cls, values):
if v: rescutkey = values.get('rescutkey')
v = v.title() rescutvalue = values.get('rescutvalue')
allowed = ["False", "True"] if rescutkey and rescutvalue:
if str(v) not in allowed: if rescutkey not in {"is", "cchalf"}:
raise ValueError( raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
f"""" {v} " is not valid. if not isinstance(rescutvalue, float) or rescutvalue <= 0:
value must be ' {allowed} '.""" raise ValueError("Rescutvalue must be a positive float if rescutkey is provided")
) return values
return v
@field_validator('trustedhigh', mode="before") @field_validator('trustedhigh', mode="before")
@classmethod @classmethod
def trusted_float(cls, v): def trustedhigh_allowed(cls, v):
if v: if v is not None:
try: try:
v = float(v) v = float(v)
if 2.0 >= v > 0: if not (0 <= v <= 2.0):
v = v raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.")
else: except (ValueError, TypeError) as e:
raise ValueError( raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") from e
f"""" {v} " is not valid. return v
value must be a float between 0 and 2.0."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 2.0."""
) from e
return v
@field_validator('proteinname', mode="before") @field_validator('chiphiangles', mode="before")
@classmethod @classmethod
def proteinname_characters(cls, v): def chiphiangles_allowed(cls, v):
if v: if v is not None:
v = str(v).replace(" ", "_") try:
if re.search("\n", v): v = float(v)
assert v.isalnum(), "is not valid. newline character detected." if not (0 <= v <= 30):
characters = re.sub("[._+-]", "", v) raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.")
assert characters.isalnum(), f"""" {v} " is not valid. except (ValueError, TypeError) as e:
must contain only alphanumeric and . _ + - characters""" raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") from e
v = re.sub(r"\.0$", "", v) return v
return v
@field_validator('chiphiangles', mode="before") @field_validator('dose', mode="before")
@classmethod @classmethod
def chiphiangles_value(cls, v): def dose_positive(cls, v):
if v: if v is not None:
try: try:
v = str(v) v = float(v)
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip()) if v <= 0:
list_of_strings = re.findall(r"\(.*?\)", v) raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
list_of_tuples = [] except (ValueError, TypeError) as e:
for el in list_of_strings: raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1] return v
second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1]
my_tuple = (float(first), float(second))
list_of_tuples.append(my_tuple)
v = list_of_tuples
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid. Example format is
(0.0, 0.0), (20.0, 0.0), (30, 0.0)"""
) from e
return v
@field_validator( class TELLModel(SpreadsheetModel):
"priority", pass # Extend the SpreadsheetModel with TELL-specific fields if needed
"comments",
"pinbarcode",
"directory",
"proteinname",
"oscillation",
"exposure",
"totalrange",
"transmission",
"targetresolution",
"aperture",
"datacollectiontype",
"processingpipeline",
"spacegroupnumber",
"cellparameters",
"rescutkey",
"rescutvalue",
"userresolution",
"pdbmodel",
"autoprocfull",
"procfull",
"adpenabled",
"noano",
"trustedhigh",
"ffcscampaign",
"autoprocextraparams",
"chiphiangles",
mode="before"
)
@classmethod
def set_default_emptystring(cls, v):
return v or ""
class Config:
str_strip_whitespace = True
aliases = {
'dewarname': 'dewarname',
'puckname': 'puckname',
'crystalname': 'crystalname',
}
class TELLModel(SpreadsheetModel):
input_order: int
samplemountcount: int = 0
samplestatus: str = "not present"
puckaddress: str = "---"
username: str
puck_number: int
prefix: Optional[str]
folder: Optional[str]

View File

@ -1,39 +1,46 @@
import logging import logging
import openpyxl import openpyxl
from pydantic import ValidationError, parse_obj_as from pydantic import ValidationError
from typing import List from typing import Union
from app.sample_models import SpreadsheetModel
from io import BytesIO from io import BytesIO
from app.sample_models import SpreadsheetModel
UNASSIGNED_PUCKADDRESS = "---" logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.DEBUG) # Change to DEBUG level to see more logs
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class SpreadsheetImportError(Exception): class SpreadsheetImportError(Exception):
pass pass
class SampleSpreadsheetImporter: class SampleSpreadsheetImporter:
def __init__(self): def __init__(self):
self.filename = None self.filename = None
self.model = None self.model = None
self.available_puck_positions = []
def _clean_value(self, value): def _clean_value(self, value, expected_type=None):
"""Clean value by converting it to the expected type and stripping whitespace for strings.""" """Clean value by converting it to the expected type and stripping whitespace for strings."""
if value is None:
return None
if expected_type == str:
return str(value).strip()
if expected_type in [float, int]:
try:
return expected_type(value)
except ValueError:
return None
if isinstance(value, str): if isinstance(value, str):
return value.strip() try:
elif isinstance(value, (float, int)): if '.' in value:
return str(value) # Always return strings for priority field validation return float(value)
else:
return int(value)
except ValueError:
return value.strip()
return value return value
def import_spreadsheet(self, file): def import_spreadsheet(self, file):
# Reinitialize state
self.available_puck_positions = [
f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6)
]
self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS)
self.model = [] self.model = []
self.filename = file.filename self.filename = file.filename
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}") logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
@ -68,73 +75,61 @@ class SampleSpreadsheetImporter:
logger.error("The 'Samples' worksheet is empty.") logger.error("The 'Samples' worksheet is empty.")
raise SpreadsheetImportError("The 'Samples' worksheet is empty.") raise SpreadsheetImportError("The 'Samples' worksheet is empty.")
expected_columns = 32 # Number of columns expected based on the model
for index, row in enumerate(rows): for index, row in enumerate(rows):
if not row or all(value is None for value in row): if not any(row):
logger.debug(f"Skipping empty row or row with all None values at index {index}.") logger.debug(f"Skipping empty row at index {index}")
continue continue
# Pad the row to ensure it has the expected number of columns
if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row))
record = {
'dewarname': self._clean_value(row[0], str),
'puckname': self._clean_value(row[1], str),
'pucktype': self._clean_value(row[2], str),
'crystalname': self._clean_value(row[3], str),
'positioninpuck': self._clean_value(row[4], int),
'priority': self._clean_value(row[5], int),
'comments': self._clean_value(row[6], str),
'directory': self._clean_value(row[7], str),
'proteinname': self._clean_value(row[8], str),
'oscillation': self._clean_value(row[9], float),
'aperture': self._clean_value(row[10], str),
'exposure': self._clean_value(row[11], float),
'totalrange': self._clean_value(row[12], float),
'transmission': self._clean_value(row[13], int),
'dose': self._clean_value(row[14], float),
'targetresolution': self._clean_value(row[15], float),
'datacollectiontype': self._clean_value(row[16], str),
'processingpipeline': self._clean_value(row[17], str),
'spacegroupnumber': self._clean_value(row[18], int),
'cellparameters': self._clean_value(row[19], str),
'rescutkey': self._clean_value(row[20], str),
'rescutvalue': self._clean_value(row[21], str),
'userresolution': self._clean_value(row[22], str),
'pdbid': self._clean_value(row[23], str),
'autoprocfull': self._clean_value(row[24], str),
'procfull': self._clean_value(row[25], str),
'adpenabled': self._clean_value(row[26], str),
'noano': self._clean_value(row[27], str),
'ffcscampaign': self._clean_value(row[28], str),
'trustedhigh': self._clean_value(row[29], str),
'autoprocextraparams': self._clean_value(row[30], str),
'chiphiangles': self._clean_value(row[31], str)
}
try: try:
sample = { validated_record = SpreadsheetModel(**record)
'dewarname': self._clean_value(row[0]), model.append(validated_record)
'puckname': self._clean_value(row[1]), logger.debug(f"Row {index + 4} processed and validated successfully")
'pucklocationindewar': self._clean_value(row[2]) if len(row) > 2 else None, except ValidationError as e:
'positioninpuck': self._clean_value(row[3]) if len(row) > 3 else None, error_message = f"Validation error in row {index + 4}: {e}"
'crystalname': self._clean_value(row[4]), logger.error(error_message)
'priority': self._clean_value(row[5]) if len(row) > 5 else None, raise SpreadsheetImportError(error_message)
'comments': self._clean_value(row[6]) if len(row) > 6 else None,
'pinbarcode': self._clean_value(row[7]) if len(row) > 7 else None,
'directory': self._clean_value(row[8]) if len(row) > 8 else None,
}
except IndexError:
logger.error(f"Index error processing row at index {index}: Row has missing values.")
raise SpreadsheetImportError(f"Index error processing row at index {index}: Row has missing values.")
# Skip rows missing essential fields
if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']:
logger.debug(f"Skipping row due to missing essential fields: {row}")
continue
model.append(sample)
logger.info(f"Sample processed: {sample}")
if not model:
logger.error("No valid samples found in the spreadsheet.")
raise SpreadsheetImportError("No valid samples found in the spreadsheet.")
logger.info(f"...finished import, got {len(model)} samples")
logger.debug(f"Model data: {model}")
self.model = model self.model = model
logger.info(f"Finished processing {len(model)} records")
try: return self.model
validated_model = self.validate()
except SpreadsheetImportError as e:
logger.error(f"Failed to validate spreadsheet: {str(e)}")
raise
return validated_model
def validate(self):
model = self.model
logger.info(f"...validating {len(model)} samples")
for sample in model:
logger.info(f"Validating sample: {sample}")
validated_model = self.data_model_validation(SpreadsheetModel, model)
for sample in validated_model:
logger.info(f"Validated sample: {sample}")
logger.debug(f"Validated model data: {validated_model}")
return validated_model
@staticmethod
def data_model_validation(data_model, model):
try:
validated = parse_obj_as(List[data_model], model)
except ValidationError as e:
logger.error(f"Validation error: {e.errors()}")
raise SpreadsheetImportError(f"{e.errors()[0]['loc']} => {e.errors()[0]['msg']}")
validated_model = [dict(value) for value in validated]
return validated_model