
Enhanced value cleaning and validation for spreadsheet data with dynamic handling of columns and corrections. Improved feedback for users with detailed error messages and visual indicators for corrected or defaulted values. Simplified backend and frontend logic for better maintainability and usability.
124 lines
4.1 KiB
Python
124 lines
4.1 KiB
Python
import re
|
|
from typing import Any, Optional, List, Dict
|
|
|
|
from pydantic import BaseModel, Field, field_validator
|
|
from typing_extensions import Annotated
|
|
import logging
|
|
from app.schemas import DataCollectionParameters
|
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class SpreadsheetModel(BaseModel):
|
|
dewarname: str = Field(..., alias="dewarname")
|
|
puckname: str = Field(..., alias="puckname")
|
|
pucktype: Optional[str] = Field(None, alias="pucktype")
|
|
crystalname: Annotated[
|
|
str,
|
|
Field(
|
|
...,
|
|
max_length=64,
|
|
title="Crystal Name",
|
|
description="max_length imposed by MTZ file header"
|
|
"format https://www.ccp4.ac.uk/html/mtzformat.html",
|
|
alias="crystalname",
|
|
),
|
|
]
|
|
positioninpuck: int # Only accept positive integers between 1 and 16
|
|
priority: Optional[int]
|
|
comments: Optional[str]
|
|
proteinname: Optional[str] = "" # Alphanumeric validation
|
|
data_collection_parameters: Optional[DataCollectionParameters] = None
|
|
|
|
# Add pucktype validation
|
|
@field_validator("pucktype", mode="before")
|
|
@classmethod
|
|
def validate_pucktype(cls, v):
|
|
if v != "unipuck":
|
|
raise ValueError(f"'{v}' is not valid. Pucktype must be 'unipuck'.")
|
|
return v
|
|
|
|
# Validators
|
|
@field_validator("dewarname", "puckname", mode="before")
|
|
@classmethod
|
|
def dewarname_puckname_characters(cls, v):
|
|
if v:
|
|
v = str(v).strip().replace(" ", "_").upper()
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
v = re.sub(r"\.0$", "", v)
|
|
return v
|
|
raise ValueError("Value must be provided for dewarname and puckname.")
|
|
|
|
@field_validator("crystalname", mode="before")
|
|
@classmethod
|
|
def parameter_characters(cls, v):
|
|
v = str(v).replace(" ", "_")
|
|
if re.search("\n", v):
|
|
assert v.isalnum(), "is not valid. newline character detected."
|
|
characters = re.sub("[._+-]", "", v)
|
|
assert (
|
|
characters.isalnum()
|
|
), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
|
|
return v
|
|
|
|
@field_validator("positioninpuck", mode="before")
|
|
@classmethod
|
|
def positioninpuck_possible(cls, v):
|
|
if not isinstance(v, int) or v < 1 or v > 16:
|
|
raise ValueError(
|
|
f" '{v}' is not valid. Value must be an integer between 1 and 16."
|
|
)
|
|
return v
|
|
|
|
@field_validator("priority", mode="before")
|
|
@classmethod
|
|
def priority_positive(cls, v):
|
|
if v is not None:
|
|
try:
|
|
v = int(v)
|
|
if v <= 0:
|
|
raise ValueError(
|
|
f" '{v}' is not valid. Value must be a positive integer."
|
|
)
|
|
except (ValueError, TypeError) as e:
|
|
raise ValueError(
|
|
f" '{v}' is not valid. Value must be a positive integer."
|
|
) from e
|
|
return v
|
|
|
|
# if not v: # Handles None or empty cases
|
|
# default_value = "{sgPuck}/{sgPosition}"
|
|
# logger.warning(
|
|
# f"'directory' field is empty or None. Assigning default value: "
|
|
# f"{default_value}"
|
|
# )
|
|
# return default_value
|
|
|
|
# class TELLModel(SpreadsheetModel):
|
|
# input_order: int
|
|
# samplemountcount: int = 0
|
|
# samplestatus: str = "not present"
|
|
# puckaddress: str = "---"
|
|
# username: str
|
|
# puck_number: int
|
|
# prefix: Optional[str]
|
|
# folder: Optional[str]
|
|
|
|
|
|
class SpreadsheetResponse(BaseModel):
|
|
data: List[SpreadsheetModel] # Validated data rows as SpreadsheetModel instances
|
|
errors: List[Dict[str, Any]] # Errors encountered during validation
|
|
raw_data: List[Dict[str, Any]] # Raw data extracted from the spreadsheet
|
|
dewars_count: int
|
|
dewars: List[str]
|
|
pucks_count: int
|
|
pucks: List[str]
|
|
samples_count: int
|
|
samples: List[str]
|
|
headers: Optional[List[str]] = None # Add headers if needed
|
|
|
|
|
|
__all__ = ["SpreadsheetModel", "SpreadsheetResponse"]
|