Add column type mapping and enhance validation

Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
2025-01-07 15:45:08 +01:00
parent 54975b5919
commit 92306fcfa6
5 changed files with 503 additions and 401 deletions
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@ -1,7 +1,12 @@
 from typing import List, Optional
 from datetime import datetime
-from pydantic import BaseModel, EmailStr, constr, Field
+from pydantic import BaseModel, EmailStr, constr, Field, field_validator
 from datetime import date
+import logging
+import re
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)


 class loginToken(BaseModel):
@ -47,37 +52,299 @@ class DewarSerialNumber(DewarSerialNumberBase):


 class DataCollectionParameters(BaseModel):
-    priority: Optional[int] = None
-    comments: Optional[str] = None
    directory: Optional[str] = None
-    proteinname: Optional[str] = None
-    oscillation: Optional[float] = None
-    aperture: Optional[str] = None
-    exposure: Optional[float] = None
-    totalrange: Optional[int] = None
-    transmission: Optional[int] = None
-    dose: Optional[float] = None
-    targetresolution: Optional[float] = None
-    datacollectiontype: Optional[str] = None
-    processingpipeline: Optional[str] = None
-    spacegroupnumber: Optional[int] = None
-    cellparameters: Optional[str] = None
-    rescutkey: Optional[str] = None
-    rescutvalue: Optional[float] = None
+    oscillation: Optional[float] = None  # Only accept positive float
+    exposure: Optional[float] = None  # Only accept positive floats between 0 and 1
+    totalrange: Optional[int] = None  # Only accept positive integers between 0 and 360
+    transmission: Optional[
+        int
+    ] = None  # Only accept positive integers between 0 and 100
+    targetresolution: Optional[float] = None  # Only accept positive float
+    aperture: Optional[str] = None  # Optional string field
+    datacollectiontype: Optional[
+        str
+    ] = None  # Only accept "standard", other types might be added later
+    processingpipeline: Optional[
+        str
+    ] = ""  # Only accept "gopy", "autoproc", "xia2dials"
+    spacegroupnumber: Optional[
+        int
+    ] = None  # Only accept positive integers between 1 and 230
+    cellparameters: Optional[
+        str
+    ] = None  # Must be a set of six positive floats or integers
+    rescutkey: Optional[str] = None  # Only accept "is" or "cchalf"
+    rescutvalue: Optional[
+        float
+    ] = None  # Must be a positive float if rescutkey is provided
    userresolution: Optional[float] = None
-    pdbid: Optional[str] = None
+    pdbid: Optional[
+        str
+    ] = ""  # Accepts either the format of the protein data bank code or {provided}
    autoprocfull: Optional[bool] = None
    procfull: Optional[bool] = None
    adpenabled: Optional[bool] = None
    noano: Optional[bool] = None
    ffcscampaign: Optional[bool] = None
-    trustedhigh: Optional[float] = None
-    autoprocextraparams: Optional[str] = None
-    chiphiangles: Optional[float] = None
+    trustedhigh: Optional[float] = None  # Should be a float between 0 and 2.0
+    autoprocextraparams: Optional[str] = None  # Optional string field
+    chiphiangles: Optional[float] = None  # Optional float field between 0 and 30
+    dose: Optional[float] = None  # Optional float field

    class Config:
        from_attributes = True

+    @field_validator("directory", mode="after")
+    @classmethod
+    def directory_characters(cls, v):
+        logger.debug(f"Validating 'directory' field with initial value: {repr(v)}")
+
+        # Default directory value if empty
+        if not v:  # Handles None or empty cases
+            default_value = "{sgPuck}/{sgPosition}"
+            logger.warning(
+                f"'directory' field is empty or None. Assigning default value: "
+                f"{default_value}"
+            )
+            return default_value
+
+        # Strip trailing slashes and store original value for comparison
+        v = str(v).strip("/")  # Ensure it's a string and no trailing slashes
+        original_value = v
+
+        # Replace spaces with underscores
+        v = v.replace(" ", "_")
+        logger.debug(f"Corrected 'directory', spaces replaced: {repr(v)}")
+
+        # Validate directory pattern with macros and allowed characters
+        valid_macros = [
+            "{date}",
+            "{prefix}",
+            "{sgPuck}",
+            "{sgPosition}",
+            "{beamline}",
+            "{sgPrefix}",
+            "{sgPriority}",
+            "{protein}",
+            "{method}",
+        ]
+        valid_macro_pattern = re.compile(
+            "|".join(re.escape(macro) for macro in valid_macros)
+        )
+
+        # Check if the value contains valid macros
+        allowed_chars_pattern = "[a-z0-9_.+-/]"
+        v_without_macros = valid_macro_pattern.sub("macro", v)
+
+        allowed_path_pattern = re.compile(
+            f"^(({allowed_chars_pattern}+|macro)*/*)*$", re.IGNORECASE
+        )
+        if not allowed_path_pattern.match(v_without_macros):
+            raise ValueError(
+                f"'{v}' is not valid. Value must be a valid path or macro."
+            )
+
+        # Log and return corrected value
+        if v != original_value:
+            logger.info(f"Directory was corrected from '{original_value}' to '{v}'")
+        return v
+
+    @field_validator("aperture", mode="before")
+    @classmethod
+    def aperture_selection(cls, v):
+        if v is not None:
+            try:
+                v = int(float(v))
+                if v not in {1, 2, 3}:
+                    raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be 1, 2, or 3."
+                ) from e
+        return v
+
+    @field_validator("oscillation", "targetresolution", mode="before")
+    @classmethod
+    def positive_float_validator(cls, v):
+        logger.debug(f"Running positive_float_validator for value: {v}")
+        if v is not None:
+            try:
+                v = float(v)
+                if v <= 0:
+                    logger.error(f"Validation failed: '{v}' is not greater than 0.")
+                    raise ValueError(
+                        f"'{v}' is not valid. Value must be a positive float."
+                    )
+            except (ValueError, TypeError) as e:
+                logger.error(f"Validation failed: '{v}' caused error {str(e)}")
+                raise ValueError(
+                    f"'{v}' is not valid. Value must be a positive float."
+                ) from e
+        logger.debug(f"Validation succeeded for value: {v}")
+        return v
+
+    @field_validator("exposure", mode="before")
+    @classmethod
+    def exposure_in_range(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 1):
+                    raise ValueError(
+                        f" '{v}' is not valid. Value must be a float between 0 and 1."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a float between 0 and 1."
+                ) from e
+        return v
+
+    @field_validator("totalrange", mode="before")
+    @classmethod
+    def totalrange_in_range(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (0 <= v <= 360):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 0 and 360."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 0 and 360."
+                ) from e
+        return v
+
+    @field_validator("transmission", mode="before")
+    @classmethod
+    def transmission_fraction(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (0 <= v <= 100):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 0 and 100."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 0 and 100."
+                ) from e
+        return v
+
+    @field_validator("datacollectiontype", mode="before")
+    @classmethod
+    def datacollectiontype_allowed(cls, v):
+        allowed = {"standard"}  # Other types of data collection might be added later
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
+        return v
+
+    @field_validator("processingpipeline", mode="before")
+    @classmethod
+    def processingpipeline_allowed(cls, v):
+        allowed = {"gopy", "autoproc", "xia2dials"}
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
+        return v
+
+    @field_validator("spacegroupnumber", mode="before")
+    @classmethod
+    def spacegroupnumber_allowed(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (1 <= v <= 230):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 1 and 230."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 1 and 230."
+                ) from e
+        return v
+
+    @field_validator("cellparameters", mode="before")
+    @classmethod
+    def cellparameters_format(cls, v):
+        if v:
+            values = [float(i) for i in v.split(",")]
+            if len(values) != 6 or any(val <= 0 for val in values):
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be a set of six positive floats or integers."
+                )
+        return v
+
+    # @field_validator("rescutkey", "rescutvalue", mode="before")
+    # @classmethod
+    # def rescutkey_value_pair(cls, values):
+    #    rescutkey = values.get("rescutkey")
+    #    rescutvalue = values.get("rescutvalue")
+    #    if rescutkey and rescutvalue:
+    #        if rescutkey not in {"is", "cchalf"}:
+    #            raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
+    #        if not isinstance(rescutvalue, float) or rescutvalue <= 0:
+    #            raise ValueError(
+    #                "Rescutvalue must be a positive float if rescutkey is provided"
+    #            )
+    #    return values
+
+    @field_validator("trustedhigh", mode="before")
+    @classmethod
+    def trustedhigh_allowed(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 2.0):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be a float between 0 and 2.0."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
+                ) from e
+        return v
+
+    @field_validator("chiphiangles", mode="before")
+    @classmethod
+    def chiphiangles_allowed(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 30):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be a float between 0 and 30."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a float between 0 and 30."
+                ) from e
+        return v
+
+    @field_validator("dose", mode="before")
+    @classmethod
+    def dose_positive(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if v <= 0:
+                    raise ValueError(
+                        f" '{v}' is not valid. Value must be a positive float."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a positive float."
+                ) from e
+        return v
+

 class SampleEventCreate(BaseModel):
    event_type: str