now with the heidi v1 spreadsheet validator

This commit is contained in:
GotthardG
2024-11-05 23:13:36 +01:00
parent 376352672f
commit 91468da9ed
7 changed files with 195 additions and 229 deletions

View File

@ -1,13 +1,6 @@
"""
Data model and validation for mandatory and single sample rows from
spreadsheet. Can be imported by sample_importer.py and database services.
"""
import re
from typing import Any, Optional, Union
from pydantic import BaseModel, Field, validator
from pydantic import BaseModel, Field, field_validator, AliasChoices
from typing_extensions import Annotated
@ -19,15 +12,15 @@ class SpreadsheetModel(BaseModel):
crystalname: Annotated[
str,
Field(...,
max_length=64,
title="Crystal Name",
description="""max_length imposed by MTZ file header format
max_length=64,
title="Crystal Name",
description="""max_length imposed by MTZ file header format
https://www.ccp4.ac.uk/html/mtzformat.html""",
alias='crystalname'
),
alias='crystalname'
),
]
positioninpuck: int
priority: Optional[str]
priority: Optional[int]
comments: Optional[str]
pinbarcode: Optional[str]
directory: Optional[str]
@ -55,20 +48,22 @@ class SpreadsheetModel(BaseModel):
autoprocextraparams: Any = ""
chiphiangles: Any = ""
@validator("dewarname", "puckname")
def dewarname_puckname_characters(cls, v, **kwargs):
assert (
len(str(v)) > 0
), f"""" {v} " is not valid.
value must be provided for all samples in spreadsheet."""
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v.upper()
@field_validator('dewarname', 'puckname', mode="before")
@classmethod
def dewarname_puckname_characters(cls, v):
if v:
assert (
len(str(v)) > 0
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v.upper()
@validator("crystalname")
def parameter_characters(cls, v, **kwargs):
@field_validator('crystalname', mode="before")
@classmethod
def parameter_characters(cls, v):
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
@ -78,8 +73,9 @@ class SpreadsheetModel(BaseModel):
v = re.sub(r"\.0$", "", v)
return v
@validator("directory")
def directory_characters(cls, v, **kwargs):
@field_validator('directory', mode="before")
@classmethod
def directory_characters(cls, v):
if v:
v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v):
@ -116,8 +112,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("positioninpuck", pre=True)
def positioninpuck_possible(cls, v, **kwargs):
@field_validator('positioninpuck', mode="before")
@classmethod
def positioninpuck_possible(cls, v):
if v:
try:
v = int(float(v))
@ -134,31 +131,37 @@ class SpreadsheetModel(BaseModel):
raise ValueError("Value must be provided. Value must be from 1 to 16.")
return v
@validator("pucklocationindewar")
def pucklocationindewar_convert_to_int(cls, v, **kwargs):
return int(float(v)) if v else v
@field_validator('pucklocationindewar', mode="before")
@classmethod
def pucklocationindewar_convert_to_str(cls, v):
if v == "Unipuck":
return v
try:
return str(int(float(v)))
except ValueError:
raise ValueError(f"Value error, could not convert string to float: '{v}'")
@validator("priority")
def priority_positive(cls, v, **kwargs):
if v:
@field_validator('priority', mode="before")
@classmethod
def priority_positive(cls, v):
if v is not None:
v = str(v).strip()
v = re.sub(r"\.0$", "", v)
try:
if not int(v) > 0:
if int(v) <= 0:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer."""
f" '{v}' is not valid. Value must be a positive integer."
)
elif int(v) > 0:
v = int(v)
v = int(v)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer."""
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v
@validator("aperture")
def aperture_selection(cls, v, **kwargs):
@field_validator('aperture', mode="before")
@classmethod
def aperture_selection(cls, v):
if v:
try:
v = int(float(v))
@ -174,15 +177,17 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator(
@field_validator(
"oscillation",
"exposure",
"totalrange",
"targetresolution",
"rescutvalue",
"userresolution",
mode="before"
)
def parameter_positive_float(cls, v, **kwargs):
@classmethod
def parameter_positive_float(cls, v):
if v:
try:
v = float(v)
@ -198,8 +203,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("transmission")
def tranmission_fraction(cls, v, **kwargs):
@field_validator('transmission', mode="before")
@classmethod
def tranmission_fraction(cls, v):
if v:
try:
v = float(v)
@ -217,8 +223,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("datacollectiontype")
def datacollectiontype_allowed(cls, v, **kwargs):
@field_validator('datacollectiontype', mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
if v:
v = v.lower()
allowed = ["standard", "serial-xtal", "multi-orientation"]
@ -229,8 +236,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("processingpipeline")
def processingpipeline_allowed(cls, v, **kwargs):
@field_validator('processingpipeline', mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
if v:
v = v.lower()
allowed = ["gopy", "autoproc", "xia2dials"]
@ -241,8 +249,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("spacegroupnumber")
def spacegroupnumber_integer(cls, v, **kwargs):
@field_validator('spacegroupnumber', mode="before")
@classmethod
def spacegroupnumber_integer(cls, v):
if v:
try:
v = int(float(v))
@ -258,8 +267,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("cellparameters")
def cellparameters_positive_floats(cls, v, **kwargs):
@field_validator('cellparameters', mode="before")
@classmethod
def cellparameters_positive_floats(cls, v):
if v:
splitted = str(v).split(" ")
if len(splitted) != 6:
@ -279,8 +289,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("rescutkey")
def rescutkey_allowed(cls, v, **kwargs):
@field_validator('rescutkey', mode="before")
@classmethod
def rescutkey_allowed(cls, v):
if v:
v = v.lower()
allowed = ["is", "cchalf"]
@ -288,8 +299,9 @@ class SpreadsheetModel(BaseModel):
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
return v
@validator("autoprocfull", "procfull", "adpenabled", "noano", "ffcscampaign")
def boolean_allowed(cls, v, **kwargs):
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
@classmethod
def boolean_allowed(cls, v):
if v:
v = v.title()
allowed = ["False", "True"]
@ -300,8 +312,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("trustedhigh")
def trusted_float(cls, v, **kwargs):
@field_validator('trustedhigh', mode="before")
@classmethod
def trusted_float(cls, v):
if v:
try:
v = float(v)
@ -319,8 +332,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("proteinname")
def proteinname_characters(cls, v, **kwargs):
@field_validator('proteinname', mode="before")
@classmethod
def proteinname_characters(cls, v):
if v:
v = str(v).replace(" ", "_")
if re.search("\n", v):
@ -331,12 +345,13 @@ class SpreadsheetModel(BaseModel):
v = re.sub(r"\.0$", "", v)
return v
@validator("chiphiangles")
def chiphiangles_value(cls, v, **kwargs):
@field_validator('chiphiangles', mode="before")
@classmethod
def chiphiangles_value(cls, v):
if v:
try:
v = str(v)
re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
list_of_strings = re.findall(r"\(.*?\)", v)
list_of_tuples = []
for el in list_of_strings:
@ -352,7 +367,7 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator(
@field_validator(
"priority",
"comments",
"pinbarcode",
@ -380,12 +395,19 @@ class SpreadsheetModel(BaseModel):
"ffcscampaign",
"autoprocextraparams",
"chiphiangles",
mode="before"
)
def set_default_emptystring(cls, v, **kwargs):
@classmethod
def set_default_emptystring(cls, v):
return v or ""
class Config:
anystr_strip_whitespace = True
str_strip_whitespace = True
aliases = {
'dewarname': 'dewarname',
'puckname': 'puckname',
'crystalname': 'crystalname',
}
class TELLModel(SpreadsheetModel):
@ -397,29 +419,3 @@ class TELLModel(SpreadsheetModel):
puck_number: int
prefix: Optional[str]
folder: Optional[str]
"""
Following params appended in teller.py for updating SDU sample model
class SDUTELLModel(TELLModel):
sdudaq: str
sdudiffcenter: str
sduopticalcenter: str
sdumount: str
sdusafetycheck: str
Following params returned in the format expected by tell.set_samples_info()
{
"userName": user,
"dewarName": sample["dewarname"],
"puckName": "", # FIXME at the moment this field is useless
"puckType": "Unipuck",
"puckAddress": sample["puckaddress"],
"puckBarcode": sample["puckname"],
"sampleBarcode": sample.get("pinbarcode", ""),
"sampleMountCount": sample["samplemountcount"],
"sampleName": sample["crystalname"],
"samplePosition": sample["positioninpuck"],
"sampleStatus": sample["samplestatus"],
}
"""