422 lines
14 KiB
Python
422 lines
14 KiB
Python
import re
|
|
from typing import Any, Optional, Union
|
|
from pydantic import BaseModel, Field, field_validator, AliasChoices
|
|
from typing_extensions import Annotated
|
|
|
|
|
|
class SpreadsheetModel(BaseModel):
|
|
dewarname: str = Field(..., alias='dewarname')
|
|
puckname: str = Field(..., alias='puckname')
|
|
pucktype: Optional[str] = "unipuck"
|
|
pucklocationindewar: Optional[Union[int, str]]
|
|
crystalname: Annotated[
|
|
str,
|
|
Field(...,
|
|
max_length=64,
|
|
title="Crystal Name",
|
|
description="""max_length imposed by MTZ file header format
|
|
https://www.ccp4.ac.uk/html/mtzformat.html""",
|
|
alias='crystalname'
|
|
),
|
|
]
|
|
positioninpuck: int
|
|
priority: Optional[int]
|
|
comments: Optional[str]
|
|
pinbarcode: Optional[str]
|
|
directory: Optional[str]
|
|
proteinname: Any = ""
|
|
oscillation: Any = ""
|
|
exposure: Any = ""
|
|
totalrange: Any = ""
|
|
transmission: Any = ""
|
|
targetresolution: Any = ""
|
|
aperture: Any = ""
|
|
datacollectiontype: Any = ""
|
|
processingpipeline: Any = ""
|
|
spacegroupnumber: Any = ""
|
|
cellparameters: Any = ""
|
|
rescutkey: Any = ""
|
|
rescutvalue: Any = ""
|
|
userresolution: Any = ""
|
|
pdbmodel: Any = ""
|
|
autoprocfull: Any = ""
|
|
procfull: Any = ""
|
|
adpenabled: Any = ""
|
|
noano: Any = ""
|
|
trustedhigh: Any = ""
|
|
ffcscampaign: Any = ""
|
|
autoprocextraparams: Any = ""
|
|
chiphiangles: Any = ""
|
|
|
|
@field_validator('dewarname', 'puckname', mode="before")
|
|
@classmethod
|
|
def dewarname_puckname_characters(cls, v):
|
|
if v:
|
|
assert (
|
|
len(str(v)) > 0
|
|
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
|
|
v = str(v).replace(" ", "_")
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
v = re.sub(r"\.0$", "", v)
|
|
return v.upper()
|
|
|
|
@field_validator('crystalname', mode="before")
|
|
@classmethod
|
|
def parameter_characters(cls, v):
|
|
v = str(v).replace(" ", "_")
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
characters = re.sub("[._+-]", "", v)
|
|
assert characters.isalnum(), f"""" {v} " is not valid.
|
|
must contain only alphanumeric and . _ + - characters"""
|
|
v = re.sub(r"\.0$", "", v)
|
|
return v
|
|
|
|
@field_validator('directory', mode="before")
|
|
@classmethod
|
|
def directory_characters(cls, v):
|
|
if v:
|
|
v = str(v).strip("/").replace(" ", "_")
|
|
if re.search("\n", v):
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
newline character detected."""
|
|
)
|
|
ok = "[a-z0-9_.+-]"
|
|
directory_re = re.compile("^((%s*|{%s+})*/?)*$" % (ok, ok), re.IGNORECASE)
|
|
if not directory_re.match(v):
|
|
raise ValueError(
|
|
f"' {v} ' is not valid. value must be a path or macro."
|
|
)
|
|
|
|
these_macros = re.findall(r"(\{[^}]+\})", v)
|
|
valid_macros = [
|
|
"{date}",
|
|
"{prefix}",
|
|
"{sgpuck}",
|
|
"{puck}",
|
|
"{beamline}",
|
|
"{sgprefix}",
|
|
"{sgpriority}",
|
|
"{sgposition}",
|
|
"{protein}",
|
|
"{method}",
|
|
]
|
|
for m in these_macros:
|
|
if m.lower() not in valid_macros:
|
|
raise ValueError(
|
|
f"""" {m} " is not a valid macro, please re-check documentation;
|
|
allowed macros: date, prefix, sgpuck, puck, beamline, sgprefix,
|
|
sgpriority, sgposition, protein, method"""
|
|
)
|
|
return v
|
|
|
|
@field_validator('positioninpuck', mode="before")
|
|
@classmethod
|
|
def positioninpuck_possible(cls, v):
|
|
if v:
|
|
try:
|
|
v = int(float(v))
|
|
if v < 1 or v > 16:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid. value must be from 1 to 16."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
Value must be a numeric type and from 1 to 16."""
|
|
) from e
|
|
else:
|
|
raise ValueError("Value must be provided. Value must be from 1 to 16.")
|
|
return v
|
|
|
|
@field_validator('pucklocationindewar', mode="before")
|
|
@classmethod
|
|
def pucklocationindewar_convert_to_str(cls, v):
|
|
if v == "Unipuck":
|
|
return v
|
|
try:
|
|
return str(int(float(v)))
|
|
except ValueError:
|
|
raise ValueError(f"Value error, could not convert string to float: '{v}'")
|
|
|
|
@field_validator('priority', mode="before")
|
|
@classmethod
|
|
def priority_positive(cls, v):
|
|
if v is not None:
|
|
v = str(v).strip()
|
|
v = re.sub(r"\.0$", "", v)
|
|
try:
|
|
if int(v) <= 0:
|
|
raise ValueError(
|
|
f" '{v}' is not valid. Value must be a positive integer."
|
|
)
|
|
v = int(v)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f" '{v}' is not valid. Value must be a positive integer."
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('aperture', mode="before")
|
|
@classmethod
|
|
def aperture_selection(cls, v):
|
|
if v:
|
|
try:
|
|
v = int(float(v))
|
|
if v not in [1, 2, 3]:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be integer 1, 2 or 3."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be integer 1, 2 or 3."""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator(
|
|
"oscillation",
|
|
"exposure",
|
|
"totalrange",
|
|
"targetresolution",
|
|
"rescutvalue",
|
|
"userresolution",
|
|
mode="before"
|
|
)
|
|
@classmethod
|
|
def parameter_positive_float(cls, v):
|
|
if v:
|
|
try:
|
|
v = float(v)
|
|
if not v > 0:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a positive float."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a positive float."""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('transmission', mode="before")
|
|
@classmethod
|
|
def tranmission_fraction(cls, v):
|
|
if v:
|
|
try:
|
|
v = float(v)
|
|
if 100 >= v > 0:
|
|
v = v
|
|
else:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a float between 0 and 100."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a float between 0 and 100."""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('datacollectiontype', mode="before")
|
|
@classmethod
|
|
def datacollectiontype_allowed(cls, v):
|
|
if v:
|
|
v = v.lower()
|
|
allowed = ["standard", "serial-xtal", "multi-orientation"]
|
|
if str(v) not in allowed:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be one of" {allowed} "."""
|
|
)
|
|
return v
|
|
|
|
@field_validator('processingpipeline', mode="before")
|
|
@classmethod
|
|
def processingpipeline_allowed(cls, v):
|
|
if v:
|
|
v = v.lower()
|
|
allowed = ["gopy", "autoproc", "xia2dials"]
|
|
if str(v) not in allowed:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be one of " {allowed} "."""
|
|
)
|
|
return v
|
|
|
|
@field_validator('spacegroupnumber', mode="before")
|
|
@classmethod
|
|
def spacegroupnumber_integer(cls, v):
|
|
if v:
|
|
try:
|
|
v = int(float(v))
|
|
if not v > 0 or not v < 231:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a positive integer between 1 and 230."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a positive integer between 1 and 230."""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('cellparameters', mode="before")
|
|
@classmethod
|
|
def cellparameters_positive_floats(cls, v):
|
|
if v:
|
|
splitted = str(v).split(" ")
|
|
if len(splitted) != 6:
|
|
raise ValueError(
|
|
f"' {v} ' is not valid. value must be a set of six numbers."
|
|
)
|
|
for el in splitted:
|
|
try:
|
|
el = float(el)
|
|
if not el > 0:
|
|
raise ValueError(
|
|
f"' {el} ' is not valid. value must be a positive float."
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"' {el} ' is not valid. value must be a positive float."
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('rescutkey', mode="before")
|
|
@classmethod
|
|
def rescutkey_allowed(cls, v):
|
|
if v:
|
|
v = v.lower()
|
|
allowed = ["is", "cchalf"]
|
|
if str(v) not in allowed:
|
|
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
|
|
return v
|
|
|
|
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
|
|
@classmethod
|
|
def boolean_allowed(cls, v):
|
|
if v:
|
|
v = v.title()
|
|
allowed = ["False", "True"]
|
|
if str(v) not in allowed:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be ' {allowed} '."""
|
|
)
|
|
return v
|
|
|
|
@field_validator('trustedhigh', mode="before")
|
|
@classmethod
|
|
def trusted_float(cls, v):
|
|
if v:
|
|
try:
|
|
v = float(v)
|
|
if 2.0 >= v > 0:
|
|
v = v
|
|
else:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a float between 0 and 2.0."""
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid.
|
|
value must be a float between 0 and 2.0."""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator('proteinname', mode="before")
|
|
@classmethod
|
|
def proteinname_characters(cls, v):
|
|
if v:
|
|
v = str(v).replace(" ", "_")
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
characters = re.sub("[._+-]", "", v)
|
|
assert characters.isalnum(), f"""" {v} " is not valid.
|
|
must contain only alphanumeric and . _ + - characters"""
|
|
v = re.sub(r"\.0$", "", v)
|
|
return v
|
|
|
|
@field_validator('chiphiangles', mode="before")
|
|
@classmethod
|
|
def chiphiangles_value(cls, v):
|
|
if v:
|
|
try:
|
|
v = str(v)
|
|
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
|
|
list_of_strings = re.findall(r"\(.*?\)", v)
|
|
list_of_tuples = []
|
|
for el in list_of_strings:
|
|
first = re.findall(r"\(.*?\,", el)[0].replace(" ", "")[1:-1]
|
|
second = re.findall(r"\,.*?\)", el)[0].replace(" ", "")[1:-1]
|
|
my_tuple = (float(first), float(second))
|
|
list_of_tuples.append(my_tuple)
|
|
v = list_of_tuples
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f"""" {v} " is not valid. Example format is
|
|
(0.0, 0.0), (20.0, 0.0), (30, 0.0)"""
|
|
) from e
|
|
return v
|
|
|
|
@field_validator(
|
|
"priority",
|
|
"comments",
|
|
"pinbarcode",
|
|
"directory",
|
|
"proteinname",
|
|
"oscillation",
|
|
"exposure",
|
|
"totalrange",
|
|
"transmission",
|
|
"targetresolution",
|
|
"aperture",
|
|
"datacollectiontype",
|
|
"processingpipeline",
|
|
"spacegroupnumber",
|
|
"cellparameters",
|
|
"rescutkey",
|
|
"rescutvalue",
|
|
"userresolution",
|
|
"pdbmodel",
|
|
"autoprocfull",
|
|
"procfull",
|
|
"adpenabled",
|
|
"noano",
|
|
"trustedhigh",
|
|
"ffcscampaign",
|
|
"autoprocextraparams",
|
|
"chiphiangles",
|
|
mode="before"
|
|
)
|
|
@classmethod
|
|
def set_default_emptystring(cls, v):
|
|
return v or ""
|
|
|
|
class Config:
|
|
str_strip_whitespace = True
|
|
aliases = {
|
|
'dewarname': 'dewarname',
|
|
'puckname': 'puckname',
|
|
'crystalname': 'crystalname',
|
|
}
|
|
|
|
|
|
class TELLModel(SpreadsheetModel):
|
|
input_order: int
|
|
samplemountcount: int = 0
|
|
samplestatus: str = "not present"
|
|
puckaddress: str = "---"
|
|
username: str
|
|
puck_number: int
|
|
prefix: Optional[str]
|
|
folder: Optional[str]
|