aaredb/backend/app/sample_models.py
2024-11-05 23:13:36 +01:00

422 lines
14 KiB
Python

import re
from typing import Any, Optional, Union
from pydantic import BaseModel, Field, field_validator, AliasChoices
from typing_extensions import Annotated
class SpreadsheetModel(BaseModel):
dewarname: str = Field(..., alias='dewarname')
puckname: str = Field(..., alias='puckname')
pucktype: Optional[str] = "unipuck"
pucklocationindewar: Optional[Union[int, str]]
crystalname: Annotated[
str,
Field(...,
max_length=64,
title="Crystal Name",
description="""max_length imposed by MTZ file header format
https://www.ccp4.ac.uk/html/mtzformat.html""",
alias='crystalname'
),
]
positioninpuck: int
priority: Optional[int]
comments: Optional[str]
pinbarcode: Optional[str]
directory: Optional[str]
proteinname: Any = ""
oscillation: Any = ""
exposure: Any = ""
totalrange: Any = ""
transmission: Any = ""
targetresolution: Any = ""
aperture: Any = ""
datacollectiontype: Any = ""
processingpipeline: Any = ""
spacegroupnumber: Any = ""
cellparameters: Any = ""
rescutkey: Any = ""
rescutvalue: Any = ""
userresolution: Any = ""
pdbmodel: Any = ""
autoprocfull: Any = ""
procfull: Any = ""
adpenabled: Any = ""
noano: Any = ""
trustedhigh: Any = ""
ffcscampaign: Any = ""
autoprocextraparams: Any = ""
chiphiangles: Any = ""
@field_validator('dewarname', 'puckname', mode="before")
@classmethod
def dewarname_puckname_characters(cls, v):
if v:
assert (
len(str(v)) > 0
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v.upper()
@field_validator('crystalname', mode="before")
@classmethod
def parameter_characters(cls, v):
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
characters = re.sub("[._+-]", "", v)
assert characters.isalnum(), f"""" {v} " is not valid.
must contain only alphanumeric and . _ + - characters"""
v = re.sub(r"\.0$", "", v)
return v
@field_validator('directory', mode="before")
@classmethod
def directory_characters(cls, v):
if v:
v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v):
raise ValueError(
f"""" {v} " is not valid.
newline character detected."""
)
ok = "[a-z0-9_.+-]"
directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE)
if not directory_re.match(v):
raise ValueError(
f"' {v} ' is not valid. value must be a path or macro."
)
these_macros = re.findall(r"(\{[^}]+\})", v)
valid_macros = [
"{date}",
"{prefix}",
"{sgpuck}",
"{puck}",
"{beamline}",
"{sgprefix}",
"{sgpriority}",
"{sgposition}",
"{protein}",
"{method}",
]
for m in these_macros:
if m.lower() not in valid_macros:
raise ValueError(
f"""" {m} " is not a valid macro, please re-check documentation;
allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix,
sgpriority, sgposition, protein, method"""
)
return v
@field_validator('positioninpuck', mode="before")
@classmethod
def positioninpuck_possible(cls, v):
if v:
try:
v = int(float(v))
if v < 1 or v > 16:
raise ValueError(
f"""" {v} " is not valid. value must be from 1 to 16."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
Value must be a numeric type and from 1 to 16."""
) from e
else:
raise ValueError("Value must be provided. Value must be from 1 to 16.")
return v
@field_validator('pucklocationindewar', mode="before")
@classmethod
def pucklocationindewar_convert_to_str(cls, v):
if v == "Unipuck":
return v
try:
return str(int(float(v)))
except ValueError:
raise ValueError(f"Value error, could not convert string to float: '{v}'")
@field_validator('priority', mode="before")
@classmethod
def priority_positive(cls, v):
if v is not None:
v = str(v).strip()
v = re.sub(r"\.0$", "", v)
try:
if int(v) <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
)
v = int(v)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v
@field_validator('aperture', mode="before")
@classmethod
def aperture_selection(cls, v):
if v:
try:
v = int(float(v))
if v not in [1, 2, 3]:
raise ValueError(
f"""" {v} " is not valid.
value must be integer 1, 2 or 3."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be integer 1, 2 or 3."""
) from e
return v
@field_validator(
"oscillation",
"exposure",
"totalrange",
"targetresolution",
"rescutvalue",
"userresolution",
mode="before"
)
@classmethod
def parameter_positive_float(cls, v):
if v:
try:
v = float(v)
if not v > 0:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive float."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive float."""
) from e
return v
@field_validator('transmission', mode="before")
@classmethod
def tranmission_fraction(cls, v):
if v:
try:
v = float(v)
if 100 >= v > 0:
v = v
else:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 100."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 100."""
) from e
return v
@field_validator('datacollectiontype', mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
if v:
v = v.lower()
allowed = ["standard", "serial-xtal", "multi-orientation"]
if str(v) not in allowed:
raise ValueError(
f"""" {v} " is not valid.
value must be one of" {allowed} "."""
)
return v
@field_validator('processingpipeline', mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
if v:
v = v.lower()
allowed = ["gopy", "autoproc", "xia2dials"]
if str(v) not in allowed:
raise ValueError(
f"""" {v} " is not valid.
value must be one of " {allowed} "."""
)
return v
@field_validator('spacegroupnumber', mode="before")
@classmethod
def spacegroupnumber_integer(cls, v):
if v:
try:
v = int(float(v))
if not v > 0 or not v < 231:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer between 1 and 230."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer between 1 and 230."""
) from e
return v
@field_validator('cellparameters', mode="before")
@classmethod
def cellparameters_positive_floats(cls, v):
if v:
splitted = str(v).split(" ")
if len(splitted) != 6:
raise ValueError(
f"' {v} ' is not valid. value must be a set of six numbers."
)
for el in splitted:
try:
el = float(el)
if not el > 0:
raise ValueError(
f"' {el} ' is not valid. value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f"' {el} ' is not valid. value must be a positive float."
) from e
return v
@field_validator('rescutkey', mode="before")
@classmethod
def rescutkey_allowed(cls, v):
if v:
v = v.lower()
allowed = ["is", "cchalf"]
if str(v) not in allowed:
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
return v
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
@classmethod
def boolean_allowed(cls, v):
if v:
v = v.title()
allowed = ["False", "True"]
if str(v) not in allowed:
raise ValueError(
f"""" {v} " is not valid.
value must be ' {allowed} '."""
)
return v
@field_validator('trustedhigh', mode="before")
@classmethod
def trusted_float(cls, v):
if v:
try:
v = float(v)
if 2.0 >= v > 0:
v = v
else:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 2.0."""
)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a float between 0 and 2.0."""
) from e
return v
@field_validator('proteinname', mode="before")
@classmethod
def proteinname_characters(cls, v):
if v:
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
characters = re.sub("[._+-]", "", v)
assert characters.isalnum(), f"""" {v} " is not valid.
must contain only alphanumeric and . _ + - characters"""
v = re.sub(r"\.0$", "", v)
return v
@field_validator('chiphiangles', mode="before")
@classmethod
def chiphiangles_value(cls, v):
if v:
try:
v = str(v)
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
list_of_strings = re.findall(r"\(.*?\)", v)
list_of_tuples = []
for el in list_of_strings:
first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1]
second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1]
my_tuple = (float(first), float(second))
list_of_tuples.append(my_tuple)
v = list_of_tuples
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid. Example format is
(0.0, 0.0), (20.0, 0.0), (30, 0.0)"""
) from e
return v
@field_validator(
"priority",
"comments",
"pinbarcode",
"directory",
"proteinname",
"oscillation",
"exposure",
"totalrange",
"transmission",
"targetresolution",
"aperture",
"datacollectiontype",
"processingpipeline",
"spacegroupnumber",
"cellparameters",
"rescutkey",
"rescutvalue",
"userresolution",
"pdbmodel",
"autoprocfull",
"procfull",
"adpenabled",
"noano",
"trustedhigh",
"ffcscampaign",
"autoprocextraparams",
"chiphiangles",
mode="before"
)
@classmethod
def set_default_emptystring(cls, v):
return v or ""
class Config:
str_strip_whitespace = True
aliases = {
'dewarname': 'dewarname',
'puckname': 'puckname',
'crystalname': 'crystalname',
}
class TELLModel(SpreadsheetModel):
input_order: int
samplemountcount: int = 0
samplestatus: str = "not present"
puckaddress: str = "---"
username: str
puck_number: int
prefix: Optional[str]
folder: Optional[str]