aaredb/backend/app/sample_models.py
2024-12-16 22:50:04 +01:00

374 lines
14 KiB
Python

import re
from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated
class SpreadsheetModel(BaseModel):
dewarname: str = Field(..., alias="dewarname")
puckname: str = Field(..., alias="puckname")
pucktype: Optional[str] = Field(None, alias="pucktype")
crystalname: Annotated[
str,
Field(
...,
max_length=64,
title="Crystal Name",
description="max_length imposed by MTZ file header"
"format https://www.ccp4.ac.uk/html/mtzformat.html",
alias="crystalname",
),
]
positioninpuck: int # Only accept positive integers between 1 and 16
priority: Optional[int]
comments: Optional[str]
directory: Optional[str]
proteinname: Optional[str] = "" # Alphanumeric validation
oscillation: Optional[float] = None # Only accept positive float
exposure: Optional[float] = None # Only accept positive floats between 0 and 1
totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
transmission: Optional[
int
] = None # Only accept positive integers between 0 and 100
targetresolution: Optional[float] = None # Only accept positive float
aperture: Optional[str] = None # Optional string field
datacollectiontype: Optional[
str
] = None # Only accept "standard", other types might be added later
processingpipeline: Optional[
str
] = "" # Only accept "gopy", "autoproc", "xia2dials"
spacegroupnumber: Optional[
int
] = None # Only accept positive integers between 1 and 230
cellparameters: Optional[
str
] = None # Must be a set of six positive floats or integers
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
rescutvalue: Optional[
float
] = None # Must be a positive float if rescutkey is provided
userresolution: Optional[float] = None
pdbid: Optional[
str
] = "" # Accepts either the format of the protein data bank code or {provided}
autoprocfull: Optional[bool] = None
procfull: Optional[bool] = None
adpenabled: Optional[bool] = None
noano: Optional[bool] = None
ffcscampaign: Optional[bool] = None
trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
autoprocextraparams: Optional[str] = None # Optional string field
chiphiangles: Optional[float] = None # Optional float field between 0 and 30
dose: Optional[float] = None # Optional float field
# Add pucktype validation
@field_validator("pucktype", mode="before")
@classmethod
def validate_pucktype(cls, v):
if v != "unipuck":
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
return v
# Validators
@field_validator("dewarname", "puckname", mode="before")
@classmethod
def dewarname_puckname_characters(cls, v):
if v:
v = str(v).strip().replace(" ", "_").upper()
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v
raise ValueError("Value must be provided for dewarname and puckname.")
@field_validator("crystalname", mode="before")
@classmethod
def parameter_characters(cls, v):
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
characters = re.sub("[._+-]", "", v)
assert (
characters.isalnum()
), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
return v
@field_validator("directory", mode="before")
@classmethod
def directory_characters(cls, v):
if v:
v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v):
raise ValueError(f" '{v}' is not valid. newline character detected.")
valid_macros = [
"{date}",
"{prefix}",
"{sgpuck}",
"{puck}",
"{beamline}",
"{sgprefix}",
"{sgpriority}",
"{sgposition}",
"{protein}",
"{method}",
]
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
v = pattern.sub("macro", v)
allowed_chars = "[a-z0-9_.+-]"
directory_re = re.compile(
f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
)
if not directory_re.match(v):
raise ValueError(
f" '{v}' is not valid. Value must be a valid path or macro."
)
return v
@field_validator("positioninpuck", mode="before")
@classmethod
def positioninpuck_possible(cls, v):
if not isinstance(v, int) or v < 1 or v > 16:
raise ValueError(
f" '{v}' is not valid. Value must be an integer between 1 and 16."
)
return v
@field_validator("priority", mode="before")
@classmethod
def priority_positive(cls, v):
if v is not None:
try:
v = int(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v
@field_validator("aperture", mode="before")
@classmethod
def aperture_selection(cls, v):
if v is not None:
try:
v = int(float(v))
if v not in {1, 2, 3}:
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be 1, 2, or 3."
) from e
return v
@field_validator("oscillation", "targetresolution", mode="before")
@classmethod
def positive_float_validator(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
@field_validator("exposure", mode="before")
@classmethod
def exposure_in_range(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 1):
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
) from e
return v
@field_validator("totalrange", mode="before")
@classmethod
def totalrange_in_range(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 360):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
) from e
return v
@field_validator("transmission", mode="before")
@classmethod
def transmission_fraction(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 100):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
) from e
return v
@field_validator("datacollectiontype", mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
allowed = {"standard"} # Other types of data collection might be added later
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("processingpipeline", mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
allowed = {"gopy", "autoproc", "xia2dials"}
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
@field_validator("rescutkey", "rescutvalue", mode="before")
@classmethod
def rescutkey_value_pair(cls, values):
rescutkey = values.get("rescutkey")
rescutvalue = values.get("rescutvalue")
if rescutkey and rescutvalue:
if rescutkey not in {"is", "cchalf"}:
raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
if not isinstance(rescutvalue, float) or rescutvalue <= 0:
raise ValueError(
"Rescutvalue must be a positive float if rescutkey is provided"
)
return values
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
) from e
return v
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
class TELLModel(SpreadsheetModel):
input_order: int
samplemountcount: int = 0
samplestatus: str = "not present"
puckaddress: str = "---"
username: str
puck_number: int
prefix: Optional[str]
folder: Optional[str]
class SpreadsheetResponse(BaseModel):
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
errors: List[Dict[str, Any]] # Errors encountered during validation
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
dewars_count: int
dewars: List[str]
pucks_count: int
pucks: List[str]
samples_count: int
samples: List[str]
headers: Optional[List[str]] = None # Add headers if needed
__all__ = ["SpreadsheetModel", "SpreadsheetResponse"]