287 lines
12 KiB
Python
287 lines
12 KiB
Python
import re
|
|
from typing import Any, Optional, List, Dict
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
from typing_extensions import Annotated
|
|
|
|
|
|
class SpreadsheetModel(BaseModel):
|
|
dewarname: str = Field(..., alias='dewarname')
|
|
puckname: str = Field(..., alias='puckname')
|
|
pucktype: Optional[str] = Field(None, alias="pucktype")
|
|
crystalname: Annotated[
|
|
str,
|
|
Field(...,
|
|
max_length=64,
|
|
title="Crystal Name",
|
|
description="max_length imposed by MTZ file header format https://www.ccp4.ac.uk/html/mtzformat.html",
|
|
alias='crystalname'
|
|
),
|
|
]
|
|
positioninpuck: int # Only accept positive integers between 1 and 16
|
|
priority: Optional[int]
|
|
comments: Optional[str]
|
|
directory: Optional[str]
|
|
proteinname: Optional[str] = "" # Alphanumeric validation
|
|
oscillation: Optional[float] = None # Only accept positive float
|
|
exposure: Optional[float] = None # Only accept positive floats between 0 and 1
|
|
totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
|
|
transmission: Optional[int] = None # Only accept positive integers between 0 and 100
|
|
targetresolution: Optional[float] = None # Only accept positive float
|
|
aperture: Optional[str] = None # Optional string field
|
|
datacollectiontype: Optional[str] = None # Only accept "standard", other types might be added later
|
|
processingpipeline: Optional[str] = "" # Only accept "gopy", "autoproc", "xia2dials"
|
|
spacegroupnumber: Optional[int] = None # Only accept positive integers between 1 and 230
|
|
cellparameters: Optional[str] = None # Must be a set of six positive floats or integers
|
|
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
|
|
rescutvalue: Optional[float] = None # Must be a positive float if rescutkey is provided
|
|
userresolution: Optional[float] = None
|
|
pdbid: Optional[str] = "" # Accepts either the format of the protein data bank code or {provided}
|
|
autoprocfull: Optional[bool] = None
|
|
procfull: Optional[bool] = None
|
|
adpenabled: Optional[bool] = None
|
|
noano: Optional[bool] = None
|
|
ffcscampaign: Optional[bool] = None
|
|
trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
|
|
autoprocextraparams: Optional[str] = None # Optional string field
|
|
chiphiangles: Optional[float] = None # Optional float field between 0 and 30
|
|
dose: Optional[float] = None # Optional float field
|
|
|
|
# Add pucktype validation
|
|
@field_validator('pucktype', mode="before")
|
|
@classmethod
|
|
def validate_pucktype(cls, v):
|
|
if v != "unipuck":
|
|
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
|
|
return v
|
|
|
|
# Validators
|
|
@field_validator('dewarname', 'puckname', mode="before")
|
|
@classmethod
|
|
def dewarname_puckname_characters(cls, v):
|
|
if v:
|
|
v = str(v).strip().replace(" ", "_").upper()
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
v = re.sub(r"\.0$", "", v)
|
|
return v
|
|
raise ValueError("Value must be provided for dewarname and puckname.")
|
|
|
|
@field_validator('crystalname', mode="before")
|
|
@classmethod
|
|
def parameter_characters(cls, v):
|
|
v = str(v).replace(" ", "_")
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
characters = re.sub("[._+-]", "", v)
|
|
assert characters.isalnum(), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
|
|
return v
|
|
|
|
@field_validator('directory', mode="before")
|
|
@classmethod
|
|
def directory_characters(cls, v):
|
|
if v:
|
|
v = str(v).strip("/").replace(" ", "_")
|
|
if re.search("\n", v):
|
|
raise ValueError(f" '{v}' is not valid. newline character detected.")
|
|
|
|
valid_macros = ["{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}",
|
|
"{sgpriority}", "{sgposition}", "{protein}", "{method}"]
|
|
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
|
|
v = pattern.sub('macro', v)
|
|
|
|
allowed_chars = "[a-z0-9_.+-]"
|
|
directory_re = re.compile(f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE)
|
|
if not directory_re.match(v):
|
|
raise ValueError(f" '{v}' is not valid. Value must be a valid path or macro.")
|
|
return v
|
|
|
|
@field_validator('positioninpuck', mode="before")
|
|
@classmethod
|
|
def positioninpuck_possible(cls, v):
|
|
if not isinstance(v, int) or v < 1 or v > 16:
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 16.")
|
|
return v
|
|
|
|
@field_validator('priority', mode="before")
|
|
@classmethod
|
|
def priority_positive(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(v)
|
|
if v <= 0:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive integer.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive integer.") from e
|
|
return v
|
|
|
|
@field_validator('aperture', mode="before")
|
|
@classmethod
|
|
def aperture_selection(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(float(v))
|
|
if v not in {1, 2, 3}:
|
|
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") from e
|
|
return v
|
|
|
|
@field_validator('oscillation', 'targetresolution', mode="before")
|
|
@classmethod
|
|
def positive_float_validator(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = float(v)
|
|
if v <= 0:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
|
|
return v
|
|
|
|
@field_validator('exposure', mode="before")
|
|
@classmethod
|
|
def exposure_in_range(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = float(v)
|
|
if not (0 <= v <= 1):
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 1.") from e
|
|
return v
|
|
|
|
@field_validator('totalrange', mode="before")
|
|
@classmethod
|
|
def totalrange_in_range(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(v)
|
|
if not (0 <= v <= 360):
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 360.") from e
|
|
return v
|
|
|
|
@field_validator('transmission', mode="before")
|
|
@classmethod
|
|
def transmission_fraction(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(v)
|
|
if not (0 <= v <= 100):
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 0 and 100.") from e
|
|
return v
|
|
|
|
@field_validator('datacollectiontype', mode="before")
|
|
@classmethod
|
|
def datacollectiontype_allowed(cls, v):
|
|
allowed = {"standard"} # Other types of data collection might be added later
|
|
if v and v.lower() not in allowed:
|
|
raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
|
|
return v
|
|
|
|
@field_validator('processingpipeline', mode="before")
|
|
@classmethod
|
|
def processingpipeline_allowed(cls, v):
|
|
allowed = {"gopy", "autoproc", "xia2dials"}
|
|
if v and v.lower() not in allowed:
|
|
raise ValueError(f" '{v}' is not valid. Value must be one of {allowed}.")
|
|
return v
|
|
|
|
@field_validator('spacegroupnumber', mode="before")
|
|
@classmethod
|
|
def spacegroupnumber_allowed(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(v)
|
|
if not (1 <= v <= 230):
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be an integer between 1 and 230.") from e
|
|
return v
|
|
|
|
@field_validator('cellparameters', mode="before")
|
|
@classmethod
|
|
def cellparameters_format(cls, v):
|
|
if v:
|
|
values = [float(i) for i in v.split(",")]
|
|
if len(values) != 6 or any(val <= 0 for val in values):
|
|
raise ValueError(f" '{v}' is not valid. Value must be a set of six positive floats or integers.")
|
|
return v
|
|
|
|
@field_validator('rescutkey', 'rescutvalue', mode="before")
|
|
@classmethod
|
|
def rescutkey_value_pair(cls, values):
|
|
rescutkey = values.get('rescutkey')
|
|
rescutvalue = values.get('rescutvalue')
|
|
if rescutkey and rescutvalue:
|
|
if rescutkey not in {"is", "cchalf"}:
|
|
raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
|
|
if not isinstance(rescutvalue, float) or rescutvalue <= 0:
|
|
raise ValueError("Rescutvalue must be a positive float if rescutkey is provided")
|
|
return values
|
|
|
|
@field_validator('trustedhigh', mode="before")
|
|
@classmethod
|
|
def trustedhigh_allowed(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = float(v)
|
|
if not (0 <= v <= 2.0):
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 2.0.") from e
|
|
return v
|
|
|
|
@field_validator('chiphiangles', mode="before")
|
|
@classmethod
|
|
def chiphiangles_allowed(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = float(v)
|
|
if not (0 <= v <= 30):
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a float between 0 and 30.") from e
|
|
return v
|
|
|
|
@field_validator('dose', mode="before")
|
|
@classmethod
|
|
def dose_positive(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = float(v)
|
|
if v <= 0:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive float.")
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(f" '{v}' is not valid. Value must be a positive float.") from e
|
|
return v
|
|
|
|
class TELLModel(SpreadsheetModel):
|
|
input_order: int
|
|
samplemountcount: int = 0
|
|
samplestatus: str = "not present"
|
|
puckaddress: str = "---"
|
|
username: str
|
|
puck_number: int
|
|
prefix: Optional[str]
|
|
folder: Optional[str]
|
|
|
|
class SpreadsheetResponse(BaseModel):
|
|
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
|
|
errors: List[Dict[str, Any]] # Errors encountered during validation
|
|
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
|
|
dewars_count: int
|
|
dewars: List[str]
|
|
pucks_count: int
|
|
pucks: List[str]
|
|
samples_count: int
|
|
samples: List[str]
|
|
headers: Optional[List[str]] = None # Add headers if needed
|
|
|
|
|
|
__all__ = ['SpreadsheetModel', 'SpreadsheetResponse']
|