aaredb/backend/app/sample_models.py
GotthardG 92306fcfa6 Add column type mapping and enhance validation
Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
2025-01-07 15:45:08 +01:00

116 lines
3.8 KiB
Python

import re
from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated
import logging
from app.schemas import DataCollectionParameters
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class SpreadsheetModel(BaseModel):
dewarname: str = Field(..., alias="dewarname")
puckname: str = Field(..., alias="puckname")
pucktype: Optional[str] = Field(None, alias="pucktype")
crystalname: Annotated[
str,
Field(
...,
max_length=64,
title="Crystal Name",
description="max_length imposed by MTZ file header"
"format https://www.ccp4.ac.uk/html/mtzformat.html",
alias="crystalname",
),
]
positioninpuck: int # Only accept positive integers between 1 and 16
priority: Optional[int]
comments: Optional[str]
proteinname: Optional[str] = "" # Alphanumeric validation
data_collection_parameters: Optional[DataCollectionParameters] = None
# Add pucktype validation
@field_validator("pucktype", mode="before")
@classmethod
def validate_pucktype(cls, v):
if v != "unipuck":
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
return v
# Validators
@field_validator("dewarname", "puckname", mode="before")
@classmethod
def dewarname_puckname_characters(cls, v):
if v:
v = str(v).strip().replace(" ", "_").upper()
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v
raise ValueError("Value must be provided for dewarname and puckname.")
@field_validator("crystalname", mode="before")
@classmethod
def parameter_characters(cls, v):
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
characters = re.sub("[._+-]", "", v)
assert (
characters.isalnum()
), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
return v
@field_validator("positioninpuck", mode="before")
@classmethod
def positioninpuck_possible(cls, v):
if not isinstance(v, int) or v < 1 or v > 16:
raise ValueError(
f" '{v}' is not valid. Value must be an integer between 1 and 16."
)
return v
@field_validator("priority", mode="before")
@classmethod
def priority_positive(cls, v):
if v is not None:
try:
v = int(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v
# class TELLModel(SpreadsheetModel):
# input_order: int
# samplemountcount: int = 0
# samplestatus: str = "not present"
# puckaddress: str = "---"
# username: str
# puck_number: int
# prefix: Optional[str]
# folder: Optional[str]
class SpreadsheetResponse(BaseModel):
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
errors: List[Dict[str, Any]] # Errors encountered during validation
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
dewars_count: int
dewars: List[str]
pucks_count: int
pucks: List[str]
samples_count: int
samples: List[str]
headers: Optional[List[str]] = None # Add headers if needed
__all__ = ["SpreadsheetModel", "SpreadsheetResponse"]