import re from typing import Any, Optional, List, Dict from pydantic import BaseModel, Field, field_validator from typing_extensions import Annotated class SpreadsheetModel(BaseModel): dewarname: str = Field(..., alias="dewarname") puckname: str = Field(..., alias="puckname") pucktype: Optional[str] = Field(None, alias="pucktype") crystalname: Annotated[ str, Field( ..., max_length=64, title="Crystal Name", description="max_length imposed by MTZ file header" "format https://www.ccp4.ac.uk/html/mtzformat.html", alias="crystalname", ), ] positioninpuck: int # Only accept positive integers between 1 and 16 priority: Optional[int] comments: Optional[str] directory: Optional[str] proteinname: Optional[str] = "" # Alphanumeric validation oscillation: Optional[float] = None # Only accept positive float exposure: Optional[float] = None # Only accept positive floats between 0 and 1 totalrange: Optional[int] = None # Only accept positive integers between 0 and 360 transmission: Optional[ int ] = None # Only accept positive integers between 0 and 100 targetresolution: Optional[float] = None # Only accept positive float aperture: Optional[str] = None # Optional string field datacollectiontype: Optional[ str ] = None # Only accept "standard", other types might be added later processingpipeline: Optional[ str ] = "" # Only accept "gopy", "autoproc", "xia2dials" spacegroupnumber: Optional[ int ] = None # Only accept positive integers between 1 and 230 cellparameters: Optional[ str ] = None # Must be a set of six positive floats or integers rescutkey: Optional[str] = None # Only accept "is" or "cchalf" rescutvalue: Optional[ float ] = None # Must be a positive float if rescutkey is provided userresolution: Optional[float] = None pdbid: Optional[ str ] = "" # Accepts either the format of the protein data bank code or {provided} autoprocfull: Optional[bool] = None procfull: Optional[bool] = None adpenabled: Optional[bool] = None noano: Optional[bool] = None ffcscampaign: Optional[bool] = None trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0 autoprocextraparams: Optional[str] = None # Optional string field chiphiangles: Optional[float] = None # Optional float field between 0 and 30 dose: Optional[float] = None # Optional float field # Add pucktype validation @field_validator("pucktype", mode="before") @classmethod def validate_pucktype(cls, v): if v != "unipuck": raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.") return v # Validators @field_validator("dewarname", "puckname", mode="before") @classmethod def dewarname_puckname_characters(cls, v): if v: v = str(v).strip().replace(" ", "_").upper() if re.search("\n", v): assert v.isalnum(), "is not valid. newline character detected." v = re.sub(r"\.0$", "", v) return v raise ValueError("Value must be provided for dewarname and puckname.") @field_validator("crystalname", mode="before") @classmethod def parameter_characters(cls, v): v = str(v).replace(" ", "_") if re.search("\n", v): assert v.isalnum(), "is not valid. newline character detected." characters = re.sub("[._+-]", "", v) assert ( characters.isalnum() ), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed." return v @field_validator("directory", mode="before") @classmethod def directory_characters(cls, v): if v: v = str(v).strip("/").replace(" ", "_") if re.search("\n", v): raise ValueError(f" '{v}' is not valid. newline character detected.") valid_macros = [ "{date}", "{prefix}", "{sgpuck}", "{puck}", "{beamline}", "{sgprefix}", "{sgpriority}", "{sgposition}", "{protein}", "{method}", ] pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros)) v = pattern.sub("macro", v) allowed_chars = "[a-z0-9_.+-]" directory_re = re.compile( f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE ) if not directory_re.match(v): raise ValueError( f" '{v}' is not valid. Value must be a valid path or macro." ) return v @field_validator("positioninpuck", mode="before") @classmethod def positioninpuck_possible(cls, v): if not isinstance(v, int) or v < 1 or v > 16: raise ValueError( f" '{v}' is not valid. Value must be an integer between 1 and 16." ) return v @field_validator("priority", mode="before") @classmethod def priority_positive(cls, v): if v is not None: try: v = int(v) if v <= 0: raise ValueError( f" '{v}' is not valid. Value must be a positive integer." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be a positive integer." ) from e return v @field_validator("aperture", mode="before") @classmethod def aperture_selection(cls, v): if v is not None: try: v = int(float(v)) if v not in {1, 2, 3}: raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be 1, 2, or 3." ) from e return v @field_validator("oscillation", "targetresolution", mode="before") @classmethod def positive_float_validator(cls, v): if v is not None: try: v = float(v) if v <= 0: raise ValueError( f" '{v}' is not valid. Value must be a positive float." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be a positive float." ) from e return v @field_validator("exposure", mode="before") @classmethod def exposure_in_range(cls, v): if v is not None: try: v = float(v) if not (0 <= v <= 1): raise ValueError( f" '{v}' is not valid. Value must be a float between 0 and 1." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be a float between 0 and 1." ) from e return v @field_validator("totalrange", mode="before") @classmethod def totalrange_in_range(cls, v): if v is not None: try: v = int(v) if not (0 <= v <= 360): raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 0 and 360." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 0 and 360." ) from e return v @field_validator("transmission", mode="before") @classmethod def transmission_fraction(cls, v): if v is not None: try: v = int(v) if not (0 <= v <= 100): raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 0 and 100." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 0 and 100." ) from e return v @field_validator("datacollectiontype", mode="before") @classmethod def datacollectiontype_allowed(cls, v): allowed = {"standard"} # Other types of data collection might be added later if v and v.lower() not in allowed: raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") return v @field_validator("processingpipeline", mode="before") @classmethod def processingpipeline_allowed(cls, v): allowed = {"gopy", "autoproc", "xia2dials"} if v and v.lower() not in allowed: raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") return v @field_validator("spacegroupnumber", mode="before") @classmethod def spacegroupnumber_allowed(cls, v): if v is not None: try: v = int(v) if not (1 <= v <= 230): raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 1 and 230." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid." f"Value must be an integer between 1 and 230." ) from e return v @field_validator("cellparameters", mode="before") @classmethod def cellparameters_format(cls, v): if v: values = [float(i) for i in v.split(",")] if len(values) != 6 or any(val <= 0 for val in values): raise ValueError( f" '{v}' is not valid." f"Value must be a set of six positive floats or integers." ) return v @field_validator("rescutkey", "rescutvalue", mode="before") @classmethod def rescutkey_value_pair(cls, values): rescutkey = values.get("rescutkey") rescutvalue = values.get("rescutvalue") if rescutkey and rescutvalue: if rescutkey not in {"is", "cchalf"}: raise ValueError("Rescutkey must be either 'is' or 'cchalf'") if not isinstance(rescutvalue, float) or rescutvalue <= 0: raise ValueError( "Rescutvalue must be a positive float if rescutkey is provided" ) return values @field_validator("trustedhigh", mode="before") @classmethod def trustedhigh_allowed(cls, v): if v is not None: try: v = float(v) if not (0 <= v <= 2.0): raise ValueError( f" '{v}' is not valid." f"Value must be a float between 0 and 2.0." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid." f"Value must be a float between 0 and 2.0." ) from e return v @field_validator("chiphiangles", mode="before") @classmethod def chiphiangles_allowed(cls, v): if v is not None: try: v = float(v) if not (0 <= v <= 30): raise ValueError( f" '{v}' is not valid." f"Value must be a float between 0 and 30." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be a float between 0 and 30." ) from e return v @field_validator("dose", mode="before") @classmethod def dose_positive(cls, v): if v is not None: try: v = float(v) if v <= 0: raise ValueError( f" '{v}' is not valid. Value must be a positive float." ) except (ValueError, TypeError) as e: raise ValueError( f" '{v}' is not valid. Value must be a positive float." ) from e return v class TELLModel(SpreadsheetModel): input_order: int samplemountcount: int = 0 samplestatus: str = "not present" puckaddress: str = "---" username: str puck_number: int prefix: Optional[str] folder: Optional[str] class SpreadsheetResponse(BaseModel): data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances errors: List[Dict[str, Any]] # Errors encountered during validation raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet dewars_count: int dewars: List[str] pucks_count: int pucks: List[str] samples_count: int samples: List[str] headers: Optional[List[str]] = None # Add headers if needed __all__ = ["SpreadsheetModel", "SpreadsheetResponse"]