Add column type mapping and enhance validation

Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
This commit is contained in:
GotthardG
2025-01-07 15:45:08 +01:00
parent 54975b5919
commit 92306fcfa6
5 changed files with 503 additions and 401 deletions

View File

@ -4,6 +4,7 @@ from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated
import logging
from app.schemas import DataCollectionParameters
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
@ -27,45 +28,8 @@ class SpreadsheetModel(BaseModel):
positioninpuck: int # Only accept positive integers between 1 and 16
priority: Optional[int]
comments: Optional[str]
directory: Optional[str]
proteinname: Optional[str] = "" # Alphanumeric validation
oscillation: Optional[float] = None # Only accept positive float
exposure: Optional[float] = None # Only accept positive floats between 0 and 1
totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
transmission: Optional[
int
] = None # Only accept positive integers between 0 and 100
targetresolution: Optional[float] = None # Only accept positive float
aperture: Optional[str] = None # Optional string field
datacollectiontype: Optional[
str
] = None # Only accept "standard", other types might be added later
processingpipeline: Optional[
str
] = "" # Only accept "gopy", "autoproc", "xia2dials"
spacegroupnumber: Optional[
int
] = None # Only accept positive integers between 1 and 230
cellparameters: Optional[
str
] = None # Must be a set of six positive floats or integers
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
rescutvalue: Optional[
float
] = None # Must be a positive float if rescutkey is provided
userresolution: Optional[float] = None
pdbid: Optional[
str
] = "" # Accepts either the format of the protein data bank code or {provided}
autoprocfull: Optional[bool] = None
procfull: Optional[bool] = None
adpenabled: Optional[bool] = None
noano: Optional[bool] = None
ffcscampaign: Optional[bool] = None
trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
autoprocextraparams: Optional[str] = None # Optional string field
chiphiangles: Optional[float] = None # Optional float field between 0 and 30
dose: Optional[float] = None # Optional float field
data_collection_parameters: Optional[DataCollectionParameters] = None
# Add pucktype validation
@field_validator("pucktype", mode="before")
@ -99,50 +63,6 @@ class SpreadsheetModel(BaseModel):
), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
return v
@field_validator("directory", mode="before")
@classmethod
def directory_characters(cls, v):
logger.debug(f"Validating 'directory' field with value: {repr(v)}")
# Assign default value if v is None or empty
if not v:
default_value = "{sgPuck}/{sgPosition}"
logger.warning(
f"'directory' field is empty or None. Assigning default value: "
f"{default_value}"
)
return default_value
v = str(v).strip("/").replace(" ", "_")
if "\n" in v:
raise ValueError(f"'{v}' is not valid. Newline character detected.")
# Replace valid macros for consistency
valid_macros = [
"{date}",
"{prefix}",
"{sgPuck}",
"{sgPosition}",
"{beamline}",
"{sgPrefix}",
"{sgPriority}",
"{protein}",
"{method}",
]
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
v = pattern.sub("macro", v)
# Ensure only allowed characters are in the directory value
allowed_chars = "[a-z0-9_.+-]"
directory_re = re.compile(
f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
)
if not directory_re.match(v):
raise ValueError(
f"'{v}' is not valid. Value must be a valid path or macro."
)
return v
@field_validator("positioninpuck", mode="before")
@classmethod
def positioninpuck_possible(cls, v):
@ -168,198 +88,6 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@field_validator("aperture", mode="before")
@classmethod
def aperture_selection(cls, v):
if v is not None:
try:
v = int(float(v))
if v not in {1, 2, 3}:
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be 1, 2, or 3."
) from e
return v
@field_validator("oscillation", "targetresolution", mode="before")
@classmethod
def positive_float_validator(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
@field_validator("exposure", mode="before")
@classmethod
def exposure_in_range(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 1):
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
) from e
return v
@field_validator("totalrange", mode="before")
@classmethod
def totalrange_in_range(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 360):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
) from e
return v
@field_validator("transmission", mode="before")
@classmethod
def transmission_fraction(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 100):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
) from e
return v
@field_validator("datacollectiontype", mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
allowed = {"standard"} # Other types of data collection might be added later
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("processingpipeline", mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
allowed = {"gopy", "autoproc", "xia2dials"}
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
# @field_validator("rescutkey", "rescutvalue", mode="before")
# @classmethod
# def rescutkey_value_pair(cls, values):
# rescutkey = values.get("rescutkey")
# rescutvalue = values.get("rescutvalue")
# if rescutkey and rescutvalue:
# if rescutkey not in {"is", "cchalf"}:
# raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
# if not isinstance(rescutvalue, float) or rescutvalue <= 0:
# raise ValueError(
# "Rescutvalue must be a positive float if rescutkey is provided"
# )
# return values
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
) from e
return v
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
# class TELLModel(SpreadsheetModel):
# input_order: int
# samplemountcount: int = 0