Validator functionnal
This commit is contained in:
parent
91468da9ed
commit
3cf9c669b9
@ -1,5 +1,3 @@
|
||||
# app/routes/spreadsheet.py
|
||||
|
||||
from fastapi import APIRouter, UploadFile, File, HTTPException
|
||||
import logging
|
||||
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
|
||||
@ -7,6 +5,7 @@ from app.services.spreadsheet_service import SampleSpreadsheetImporter, Spreadsh
|
||||
router = APIRouter()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@router.post("/upload")
|
||||
async def upload_file(file: UploadFile = File(...)):
|
||||
try:
|
||||
@ -22,9 +21,9 @@ async def upload_file(file: UploadFile = File(...)):
|
||||
validated_model = importer.import_spreadsheet(file)
|
||||
logger.info(f"Validated model: {validated_model}")
|
||||
|
||||
dewars = {sample['dewarname'] for sample in validated_model if 'dewarname' in sample}
|
||||
pucks = {sample['puckname'] for sample in validated_model if 'puckname' in sample}
|
||||
samples = {sample['crystalname'] for sample in validated_model if 'crystalname' in sample}
|
||||
dewars = {sample.dewarname for sample in validated_model if sample.dewarname}
|
||||
pucks = {sample.puckname for sample in validated_model if sample.puckname}
|
||||
samples = {sample.crystalname for sample in validated_model if sample.crystalname}
|
||||
|
||||
# Logging the sets of names
|
||||
logger.info(f"Dewar Names: {dewars}")
|
||||
|
@ -1,65 +1,70 @@
|
||||
import re
|
||||
from typing import Any, Optional, Union
|
||||
from pydantic import BaseModel, Field, field_validator, AliasChoices
|
||||
from typing import Any, Optional
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import Annotated
|
||||
|
||||
|
||||
class SpreadsheetModel(BaseModel):
|
||||
dewarname: str = Field(..., alias='dewarname')
|
||||
puckname: str = Field(..., alias='puckname')
|
||||
pucktype: Optional[str] = "unipuck"
|
||||
pucklocationindewar: Optional[Union[int, str]]
|
||||
pucktype: Optional[str] = Field(None, alias="pucktype")
|
||||
crystalname: Annotated[
|
||||
str,
|
||||
Field(...,
|
||||
max_length=64,
|
||||
title="Crystal Name",
|
||||
description="""max_length imposed by MTZ file header format
|
||||
https://www.ccp4.ac.uk/html/mtzformat.html""",
|
||||
description="max_length imposed by MTZ file header format https://www.ccp4.ac.uk/html/mtzformat.html",
|
||||
alias='crystalname'
|
||||
),
|
||||
),
|
||||
]
|
||||
positioninpuck: int
|
||||
positioninpuck: int # Only accept positive integers between 1 and 16
|
||||
priority: Optional[int]
|
||||
comments: Optional[str]
|
||||
pinbarcode: Optional[str]
|
||||
directory: Optional[str]
|
||||
proteinname: Any = ""
|
||||
oscillation: Any = ""
|
||||
exposure: Any = ""
|
||||
totalrange: Any = ""
|
||||
transmission: Any = ""
|
||||
targetresolution: Any = ""
|
||||
aperture: Any = ""
|
||||
datacollectiontype: Any = ""
|
||||
processingpipeline: Any = ""
|
||||
spacegroupnumber: Any = ""
|
||||
cellparameters: Any = ""
|
||||
rescutkey: Any = ""
|
||||
rescutvalue: Any = ""
|
||||
userresolution: Any = ""
|
||||
pdbmodel: Any = ""
|
||||
autoprocfull: Any = ""
|
||||
procfull: Any = ""
|
||||
adpenabled: Any = ""
|
||||
noano: Any = ""
|
||||
trustedhigh: Any = ""
|
||||
ffcscampaign: Any = ""
|
||||
autoprocextraparams: Any = ""
|
||||
chiphiangles: Any = ""
|
||||
proteinname: Optional[str] = "" # Alphanumeric validation
|
||||
oscillation: Optional[float] = None # Only accept positive float
|
||||
exposure: Optional[float] = None # Only accept positive floats between 0 and 1
|
||||
totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
|
||||
transmission: Optional[int] = None # Only accept positive integers between 0 and 100
|
||||
targetresolution: Optional[float] = None # Only accept positive float
|
||||
aperture: Optional[str] = None # Optional string field
|
||||
datacollectiontype: Optional[str] = None # Only accept "standard", other types might be added later
|
||||
processingpipeline: Optional[str] = "" # Only accept "gopy", "autoproc", "xia2dials"
|
||||
spacegroupnumber: Optional[int] = None # Only accept positive integers between 1 and 230
|
||||
cellparameters: Optional[str] = None # Must be a set of six positive floats or integers
|
||||
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
|
||||
rescutvalue: Optional[float] = None # Must be a positive float if rescutkey is provided
|
||||
userresolution: Optional[float] = None
|
||||
pdbid: Optional[str] = "" # Accepts either the format of the protein data bank code or {provided}
|
||||
autoprocfull: Optional[bool] = None
|
||||
procfull: Optional[bool] = None
|
||||
adpenabled: Optional[bool] = None
|
||||
noano: Optional[bool] = None
|
||||
ffcscampaign: Optional[bool] = None
|
||||
trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
|
||||
autoprocextraparams: Optional[str] = None # Optional string field
|
||||
chiphiangles: Optional[float] = None # Optional float field between 0 and 30
|
||||
dose: Optional[float] = None # Optional float field
|
||||
|
||||
# Add pucktype validation
|
||||
@field_validator('pucktype', mode="before")
|
||||
@classmethod
|
||||
def validate_pucktype(cls, v):
|
||||
if v != "unipuck":
|
||||
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
|
||||
return v
|
||||
|
||||
# Validators
|
||||
@field_validator('dewarname', 'puckname', mode="before")
|
||||
@classmethod
|
||||
def dewarname_puckname_characters(cls, v):
|
||||
if v:
|
||||
assert (
|
||||
len(str(v)) > 0
|
||||
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
|
||||
v = str(v).replace(" ", "_")
|
||||
v = str(v).strip().replace(" ", "_").upper()
|
||||
if re.search("\n", v):
|
||||
assert v.isalnum(), "is not valid. newline character detected."
|
||||
v = re.sub(r"\.0$", "", v)
|
||||
return v.upper()
|
||||
return v
|
||||
raise ValueError("Value must be provided for dewarname and puckname.")
|
||||
|
||||
@field_validator('crystalname', mode="before")
|
||||
@classmethod
|
||||
@ -68,9 +73,7 @@ class SpreadsheetModel(BaseModel):
|
||||
if re.search("\n", v):
|
||||
assert v.isalnum(), "is not valid. newline character detected."
|
||||
characters = re.sub("[._+-]", "", v)
|
||||
assert characters.isalnum(), f"""" {v} " is not valid.
|
||||
must contain only alphanumeric and . _ + - characters"""
|
||||
v = re.sub(r"\.0$", "", v)
|
||||
assert characters.isalnum(), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
|
||||
return v
|
||||
|
||||
@field_validator('directory', mode="before")
|
||||
@ -79,343 +82,182 @@ class SpreadsheetModel(BaseModel):
|
||||
if v:
|
||||
v = str(v).strip("/").replace(" ", "_")
|
||||
if re.search("\n", v):
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
newline character detected."""
|
||||
)
|
||||
ok = "[a-z0-9_.+-]"
|
||||
directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE)
|
||||
if not directory_re.match(v):
|
||||
raise ValueError(
|
||||
f"' {v} ' is not valid. value must be a path or macro."
|
||||
)
|
||||
raise ValueError(f" '{v}' is not valid. newline character detected.")
|
||||
|
||||
these_macros = re.findall(r"(\{[^}]+\})", v)
|
||||
valid_macros = [
|
||||
"{date}",
|
||||
"{prefix}",
|
||||
"{sgpuck}",
|
||||
"{puck}",
|
||||
"{beamline}",
|
||||
"{sgprefix}",
|
||||
"{sgpriority}",
|
||||
"{sgposition}",
|
||||
"{protein}",
|
||||
"{method}",
|
||||
]
|
||||
for m in these_macros:
|
||||
if m.lower() not in valid_macros:
|
||||
raise ValueError(
|
||||
f"""" {m} " is not a valid macro, please re-check documentation;
|
||||
allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix,
|
||||
sgpriority, sgposition, protein, method"""
|
||||
)
|
||||
valid_macros = ["{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}",
|
||||
"{sgpriority}", "{sgposition}", "{protein}", "{method}"]
|
||||
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
|
||||
v = pattern.sub('macro', v)
|
||||
|
||||
allowed_chars = "[a-z0-9_.+-]"
|
||||
directory_re = re.compile(f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE)
|
||||
if not directory_re.match(v):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a valid path or macro.")
|
||||
return v
|
||||
|
||||
@field_validator('positioninpuck', mode="before")
|
||||
@classmethod
|
||||
def positioninpuck_possible(cls, v):
|
||||
if v:
|
||||
try:
|
||||
v = int(float(v))
|
||||
if v < 1 or v > 16:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid. value must be from 1 to 16."""
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
Value must be a numeric type and from 1 to 16."""
|
||||
) from e
|
||||
else:
|
||||
raise ValueError("Value must be provided. Value must be from 1 to 16.")
|
||||
if not isinstance(v, int) or v < 1 or v > 16:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 16.")
|
||||
return v
|
||||
|
||||
@field_validator('pucklocationindewar', mode="before")
|
||||
@classmethod
|
||||
def pucklocationindewar_convert_to_str(cls, v):
|
||||
if v == "Unipuck":
|
||||
return v
|
||||
try:
|
||||
return str(int(float(v)))
|
||||
except ValueError:
|
||||
raise ValueError(f"Value error, could not convert string to float: '{v}'")
|
||||
|
||||
@field_validator('priority', mode="before")
|
||||
@classmethod
|
||||
def priority_positive(cls, v):
|
||||
if v is not None:
|
||||
v = str(v).strip()
|
||||
v = re.sub(r"\.0$", "", v)
|
||||
try:
|
||||
if int(v) <= 0:
|
||||
raise ValueError(
|
||||
f" '{v}' is not valid. Value must be a positive integer."
|
||||
)
|
||||
v = int(v)
|
||||
if v <= 0:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive integer.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f" '{v}' is not valid. Value must be a positive integer."
|
||||
) from e
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") from e
|
||||
return v
|
||||
|
||||
@field_validator('aperture', mode="before")
|
||||
@classmethod
|
||||
def aperture_selection(cls, v):
|
||||
if v:
|
||||
if v is not None:
|
||||
try:
|
||||
v = int(float(v))
|
||||
if v not in [1, 2, 3]:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be integer 1, 2 or 3."""
|
||||
)
|
||||
if v not in {1, 2, 3}:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be integer 1, 2 or 3."""
|
||||
) from e
|
||||
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") from e
|
||||
return v
|
||||
|
||||
@field_validator(
|
||||
"oscillation",
|
||||
"exposure",
|
||||
"totalrange",
|
||||
"targetresolution",
|
||||
"rescutvalue",
|
||||
"userresolution",
|
||||
mode="before"
|
||||
)
|
||||
@field_validator('oscillation', 'targetresolution', mode="before")
|
||||
@classmethod
|
||||
def parameter_positive_float(cls, v):
|
||||
if v:
|
||||
def positive_float_validator(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if not v > 0:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a positive float."""
|
||||
)
|
||||
if v <= 0:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a positive float."""
|
||||
) from e
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
|
||||
return v
|
||||
|
||||
@field_validator('exposure', mode="before")
|
||||
@classmethod
|
||||
def exposure_in_range(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if not (0 <= v <= 1):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") from e
|
||||
return v
|
||||
|
||||
@field_validator('totalrange', mode="before")
|
||||
@classmethod
|
||||
def totalrange_in_range(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = int(v)
|
||||
if not (0 <= v <= 360):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") from e
|
||||
return v
|
||||
|
||||
@field_validator('transmission', mode="before")
|
||||
@classmethod
|
||||
def tranmission_fraction(cls, v):
|
||||
if v:
|
||||
def transmission_fraction(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if 100 >= v > 0:
|
||||
v = v
|
||||
else:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a float between 0 and 100."""
|
||||
)
|
||||
v = int(v)
|
||||
if not (0 <= v <= 100):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a float between 0 and 100."""
|
||||
) from e
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") from e
|
||||
return v
|
||||
|
||||
@field_validator('datacollectiontype', mode="before")
|
||||
@classmethod
|
||||
def datacollectiontype_allowed(cls, v):
|
||||
if v:
|
||||
v = v.lower()
|
||||
allowed = ["standard", "serial-xtal", "multi-orientation"]
|
||||
if str(v) not in allowed:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be one of" {allowed} "."""
|
||||
)
|
||||
allowed = {"standard"} # Other types of data collection might be added later
|
||||
if v and v.lower() not in allowed:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
|
||||
return v
|
||||
|
||||
@field_validator('processingpipeline', mode="before")
|
||||
@classmethod
|
||||
def processingpipeline_allowed(cls, v):
|
||||
if v:
|
||||
v = v.lower()
|
||||
allowed = ["gopy", "autoproc", "xia2dials"]
|
||||
if str(v) not in allowed:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be one of " {allowed} "."""
|
||||
)
|
||||
allowed = {"gopy", "autoproc", "xia2dials"}
|
||||
if v and v.lower() not in allowed:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
|
||||
return v
|
||||
|
||||
@field_validator('spacegroupnumber', mode="before")
|
||||
@classmethod
|
||||
def spacegroupnumber_integer(cls, v):
|
||||
if v:
|
||||
try:
|
||||
v = int(float(v))
|
||||
if not v > 0 or not v < 231:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a positive integer between 1 and 230."""
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a positive integer between 1 and 230."""
|
||||
) from e
|
||||
return v
|
||||
|
||||
@field_validator('cellparameters', mode="before")
|
||||
@classmethod
|
||||
def cellparameters_positive_floats(cls, v):
|
||||
if v:
|
||||
splitted = str(v).split(" ")
|
||||
if len(splitted) != 6:
|
||||
raise ValueError(
|
||||
f"' {v} ' is not valid. value must be a set of six numbers."
|
||||
)
|
||||
for el in splitted:
|
||||
@field_validator('spacegroupnumber', mode="before")
|
||||
@classmethod
|
||||
def spacegroupnumber_allowed(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
el = float(el)
|
||||
if not el > 0:
|
||||
raise ValueError(
|
||||
f"' {el} ' is not valid. value must be a positive float."
|
||||
)
|
||||
v = int(v)
|
||||
if not (1 <= v <= 230):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"' {el} ' is not valid. value must be a positive float."
|
||||
) from e
|
||||
return v
|
||||
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") from e
|
||||
return v
|
||||
|
||||
@field_validator('rescutkey', mode="before")
|
||||
@classmethod
|
||||
def rescutkey_allowed(cls, v):
|
||||
if v:
|
||||
v = v.lower()
|
||||
allowed = ["is", "cchalf"]
|
||||
if str(v) not in allowed:
|
||||
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
|
||||
return v
|
||||
@field_validator('cellparameters', mode="before")
|
||||
@classmethod
|
||||
def cellparameters_format(cls, v):
|
||||
if v:
|
||||
values = [float(i) for i in v.split(",")]
|
||||
if len(values) != 6 or any(val <= 0 for val in values):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a set of six positive floats or integers.")
|
||||
return v
|
||||
|
||||
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
|
||||
@classmethod
|
||||
def boolean_allowed(cls, v):
|
||||
if v:
|
||||
v = v.title()
|
||||
allowed = ["False", "True"]
|
||||
if str(v) not in allowed:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be ' {allowed} '."""
|
||||
)
|
||||
return v
|
||||
@field_validator('rescutkey', 'rescutvalue', mode="before")
|
||||
@classmethod
|
||||
def rescutkey_value_pair(cls, values):
|
||||
rescutkey = values.get('rescutkey')
|
||||
rescutvalue = values.get('rescutvalue')
|
||||
if rescutkey and rescutvalue:
|
||||
if rescutkey not in {"is", "cchalf"}:
|
||||
raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
|
||||
if not isinstance(rescutvalue, float) or rescutvalue <= 0:
|
||||
raise ValueError("Rescutvalue must be a positive float if rescutkey is provided")
|
||||
return values
|
||||
|
||||
@field_validator('trustedhigh', mode="before")
|
||||
@classmethod
|
||||
def trusted_float(cls, v):
|
||||
if v:
|
||||
try:
|
||||
v = float(v)
|
||||
if 2.0 >= v > 0:
|
||||
v = v
|
||||
else:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a float between 0 and 2.0."""
|
||||
)
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid.
|
||||
value must be a float between 0 and 2.0."""
|
||||
) from e
|
||||
return v
|
||||
@field_validator('trustedhigh', mode="before")
|
||||
@classmethod
|
||||
def trustedhigh_allowed(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if not (0 <= v <= 2.0):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") from e
|
||||
return v
|
||||
|
||||
@field_validator('proteinname', mode="before")
|
||||
@classmethod
|
||||
def proteinname_characters(cls, v):
|
||||
if v:
|
||||
v = str(v).replace(" ", "_")
|
||||
if re.search("\n", v):
|
||||
assert v.isalnum(), "is not valid. newline character detected."
|
||||
characters = re.sub("[._+-]", "", v)
|
||||
assert characters.isalnum(), f"""" {v} " is not valid.
|
||||
must contain only alphanumeric and . _ + - characters"""
|
||||
v = re.sub(r"\.0$", "", v)
|
||||
return v
|
||||
@field_validator('chiphiangles', mode="before")
|
||||
@classmethod
|
||||
def chiphiangles_allowed(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if not (0 <= v <= 30):
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") from e
|
||||
return v
|
||||
|
||||
@field_validator('chiphiangles', mode="before")
|
||||
@classmethod
|
||||
def chiphiangles_value(cls, v):
|
||||
if v:
|
||||
try:
|
||||
v = str(v)
|
||||
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
|
||||
list_of_strings = re.findall(r"\(.*?\)", v)
|
||||
list_of_tuples = []
|
||||
for el in list_of_strings:
|
||||
first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1]
|
||||
second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1]
|
||||
my_tuple = (float(first), float(second))
|
||||
list_of_tuples.append(my_tuple)
|
||||
v = list_of_tuples
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(
|
||||
f"""" {v} " is not valid. Example format is
|
||||
(0.0, 0.0), (20.0, 0.0), (30, 0.0)"""
|
||||
) from e
|
||||
return v
|
||||
@field_validator('dose', mode="before")
|
||||
@classmethod
|
||||
def dose_positive(cls, v):
|
||||
if v is not None:
|
||||
try:
|
||||
v = float(v)
|
||||
if v <= 0:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
|
||||
except (ValueError, TypeError) as e:
|
||||
raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
|
||||
return v
|
||||
|
||||
@field_validator(
|
||||
"priority",
|
||||
"comments",
|
||||
"pinbarcode",
|
||||
"directory",
|
||||
"proteinname",
|
||||
"oscillation",
|
||||
"exposure",
|
||||
"totalrange",
|
||||
"transmission",
|
||||
"targetresolution",
|
||||
"aperture",
|
||||
"datacollectiontype",
|
||||
"processingpipeline",
|
||||
"spacegroupnumber",
|
||||
"cellparameters",
|
||||
"rescutkey",
|
||||
"rescutvalue",
|
||||
"userresolution",
|
||||
"pdbmodel",
|
||||
"autoprocfull",
|
||||
"procfull",
|
||||
"adpenabled",
|
||||
"noano",
|
||||
"trustedhigh",
|
||||
"ffcscampaign",
|
||||
"autoprocextraparams",
|
||||
"chiphiangles",
|
||||
mode="before"
|
||||
)
|
||||
@classmethod
|
||||
def set_default_emptystring(cls, v):
|
||||
return v or ""
|
||||
|
||||
class Config:
|
||||
str_strip_whitespace = True
|
||||
aliases = {
|
||||
'dewarname': 'dewarname',
|
||||
'puckname': 'puckname',
|
||||
'crystalname': 'crystalname',
|
||||
}
|
||||
|
||||
|
||||
class TELLModel(SpreadsheetModel):
|
||||
input_order: int
|
||||
samplemountcount: int = 0
|
||||
samplestatus: str = "not present"
|
||||
puckaddress: str = "---"
|
||||
username: str
|
||||
puck_number: int
|
||||
prefix: Optional[str]
|
||||
folder: Optional[str]
|
||||
class TELLModel(SpreadsheetModel):
|
||||
pass # Extend the SpreadsheetModel with TELL-specific fields if needed
|
@ -1,39 +1,46 @@
|
||||
import logging
|
||||
import openpyxl
|
||||
from pydantic import ValidationError, parse_obj_as
|
||||
from typing import List
|
||||
from app.sample_models import SpreadsheetModel
|
||||
from pydantic import ValidationError
|
||||
from typing import Union
|
||||
from io import BytesIO
|
||||
from app.sample_models import SpreadsheetModel
|
||||
|
||||
UNASSIGNED_PUCKADDRESS = "---"
|
||||
logging.basicConfig(level=logging.DEBUG) # Change to DEBUG level to see more logs
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SpreadsheetImportError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class SampleSpreadsheetImporter:
|
||||
def __init__(self):
|
||||
self.filename = None
|
||||
self.model = None
|
||||
self.available_puck_positions = []
|
||||
|
||||
def _clean_value(self, value):
|
||||
def _clean_value(self, value, expected_type=None):
|
||||
"""Clean value by converting it to the expected type and stripping whitespace for strings."""
|
||||
if value is None:
|
||||
return None
|
||||
if expected_type == str:
|
||||
return str(value).strip()
|
||||
if expected_type in [float, int]:
|
||||
try:
|
||||
return expected_type(value)
|
||||
except ValueError:
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
return value.strip()
|
||||
elif isinstance(value, (float, int)):
|
||||
return str(value) # Always return strings for priority field validation
|
||||
try:
|
||||
if '.' in value:
|
||||
return float(value)
|
||||
else:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return value.strip()
|
||||
return value
|
||||
|
||||
def import_spreadsheet(self, file):
|
||||
# Reinitialize state
|
||||
self.available_puck_positions = [
|
||||
f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6)
|
||||
]
|
||||
self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS)
|
||||
self.model = []
|
||||
|
||||
self.filename = file.filename
|
||||
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
|
||||
|
||||
@ -68,73 +75,61 @@ class SampleSpreadsheetImporter:
|
||||
logger.error("The 'Samples' worksheet is empty.")
|
||||
raise SpreadsheetImportError("The 'Samples' worksheet is empty.")
|
||||
|
||||
expected_columns = 32 # Number of columns expected based on the model
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
if not row or all(value is None for value in row):
|
||||
logger.debug(f"Skipping empty row or row with all None values at index {index}.")
|
||||
if not any(row):
|
||||
logger.debug(f"Skipping empty row at index {index}")
|
||||
continue
|
||||
|
||||
# Pad the row to ensure it has the expected number of columns
|
||||
if len(row) < expected_columns:
|
||||
row = list(row) + [None] * (expected_columns - len(row))
|
||||
|
||||
record = {
|
||||
'dewarname': self._clean_value(row[0], str),
|
||||
'puckname': self._clean_value(row[1], str),
|
||||
'pucktype': self._clean_value(row[2], str),
|
||||
'crystalname': self._clean_value(row[3], str),
|
||||
'positioninpuck': self._clean_value(row[4], int),
|
||||
'priority': self._clean_value(row[5], int),
|
||||
'comments': self._clean_value(row[6], str),
|
||||
'directory': self._clean_value(row[7], str),
|
||||
'proteinname': self._clean_value(row[8], str),
|
||||
'oscillation': self._clean_value(row[9], float),
|
||||
'aperture': self._clean_value(row[10], str),
|
||||
'exposure': self._clean_value(row[11], float),
|
||||
'totalrange': self._clean_value(row[12], float),
|
||||
'transmission': self._clean_value(row[13], int),
|
||||
'dose': self._clean_value(row[14], float),
|
||||
'targetresolution': self._clean_value(row[15], float),
|
||||
'datacollectiontype': self._clean_value(row[16], str),
|
||||
'processingpipeline': self._clean_value(row[17], str),
|
||||
'spacegroupnumber': self._clean_value(row[18], int),
|
||||
'cellparameters': self._clean_value(row[19], str),
|
||||
'rescutkey': self._clean_value(row[20], str),
|
||||
'rescutvalue': self._clean_value(row[21], str),
|
||||
'userresolution': self._clean_value(row[22], str),
|
||||
'pdbid': self._clean_value(row[23], str),
|
||||
'autoprocfull': self._clean_value(row[24], str),
|
||||
'procfull': self._clean_value(row[25], str),
|
||||
'adpenabled': self._clean_value(row[26], str),
|
||||
'noano': self._clean_value(row[27], str),
|
||||
'ffcscampaign': self._clean_value(row[28], str),
|
||||
'trustedhigh': self._clean_value(row[29], str),
|
||||
'autoprocextraparams': self._clean_value(row[30], str),
|
||||
'chiphiangles': self._clean_value(row[31], str)
|
||||
}
|
||||
|
||||
try:
|
||||
sample = {
|
||||
'dewarname': self._clean_value(row[0]),
|
||||
'puckname': self._clean_value(row[1]),
|
||||
'pucklocationindewar': self._clean_value(row[2]) if len(row) > 2 else None,
|
||||
'positioninpuck': self._clean_value(row[3]) if len(row) > 3 else None,
|
||||
'crystalname': self._clean_value(row[4]),
|
||||
'priority': self._clean_value(row[5]) if len(row) > 5 else None,
|
||||
'comments': self._clean_value(row[6]) if len(row) > 6 else None,
|
||||
'pinbarcode': self._clean_value(row[7]) if len(row) > 7 else None,
|
||||
'directory': self._clean_value(row[8]) if len(row) > 8 else None,
|
||||
}
|
||||
except IndexError:
|
||||
logger.error(f"Index error processing row at index {index}: Row has missing values.")
|
||||
raise SpreadsheetImportError(f"Index error processing row at index {index}: Row has missing values.")
|
||||
validated_record = SpreadsheetModel(**record)
|
||||
model.append(validated_record)
|
||||
logger.debug(f"Row {index + 4} processed and validated successfully")
|
||||
except ValidationError as e:
|
||||
error_message = f"Validation error in row {index + 4}: {e}"
|
||||
logger.error(error_message)
|
||||
raise SpreadsheetImportError(error_message)
|
||||
|
||||
# Skip rows missing essential fields
|
||||
if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']:
|
||||
logger.debug(f"Skipping row due to missing essential fields: {row}")
|
||||
continue
|
||||
|
||||
model.append(sample)
|
||||
logger.info(f"Sample processed: {sample}")
|
||||
|
||||
if not model:
|
||||
logger.error("No valid samples found in the spreadsheet.")
|
||||
raise SpreadsheetImportError("No valid samples found in the spreadsheet.")
|
||||
|
||||
logger.info(f"...finished import, got {len(model)} samples")
|
||||
logger.debug(f"Model data: {model}")
|
||||
self.model = model
|
||||
|
||||
try:
|
||||
validated_model = self.validate()
|
||||
except SpreadsheetImportError as e:
|
||||
logger.error(f"Failed to validate spreadsheet: {str(e)}")
|
||||
raise
|
||||
|
||||
return validated_model
|
||||
|
||||
def validate(self):
|
||||
model = self.model
|
||||
logger.info(f"...validating {len(model)} samples")
|
||||
|
||||
for sample in model:
|
||||
logger.info(f"Validating sample: {sample}")
|
||||
|
||||
validated_model = self.data_model_validation(SpreadsheetModel, model)
|
||||
|
||||
for sample in validated_model:
|
||||
logger.info(f"Validated sample: {sample}")
|
||||
|
||||
logger.debug(f"Validated model data: {validated_model}")
|
||||
return validated_model
|
||||
|
||||
@staticmethod
|
||||
def data_model_validation(data_model, model):
|
||||
try:
|
||||
validated = parse_obj_as(List[data_model], model)
|
||||
except ValidationError as e:
|
||||
logger.error(f"Validation error: {e.errors()}")
|
||||
raise SpreadsheetImportError(f"{e.errors()[0]['loc']} => {e.errors()[0]['msg']}")
|
||||
|
||||
validated_model = [dict(value) for value in validated]
|
||||
return validated_model
|
||||
logger.info(f"Finished processing {len(model)} records")
|
||||
return self.model
|
||||
|
Loading…
x
Reference in New Issue
Block a user