Add column type mapping and enhance validation

Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
This commit is contained in:
GotthardG 2025-01-07 15:45:08 +01:00
parent 54975b5919
commit 92306fcfa6
5 changed files with 503 additions and 401 deletions

View File

@ -1,4 +1,5 @@
from app.sample_models import SpreadsheetModel, SpreadsheetResponse from app.sample_models import SpreadsheetResponse
from app.schemas import DataCollectionParameters
from fastapi import APIRouter, UploadFile, File, HTTPException from fastapi import APIRouter, UploadFile, File, HTTPException
import logging import logging
from app.services.spreadsheet_service import ( from app.services.spreadsheet_service import (
@ -39,6 +40,8 @@ async def download_template():
@router.post("/upload", response_model=SpreadsheetResponse) @router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)): async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results.""" """Process the uploaded spreadsheet and return validation results."""
from app.schemas import DataCollectionParameters
try: try:
logger.info(f"Received file: {file.filename}") logger.info(f"Received file: {file.filename}")
@ -66,19 +69,41 @@ async def upload_file(file: UploadFile = File(...)):
} }
# Construct the response model with the processed data # Construct the response model with the processed data
# Update raw_data with corrected directory values
updated_raw_data = []
for row in raw_data:
directory_value = row.get("directory") or row["data"][7]
try:
corrected_directory = DataCollectionParameters(
directory=directory_value
).directory
corrected = (
directory_value != corrected_directory
) # Check if a correction was made
row["data"][7] = corrected_directory
row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
row["corrected"] = corrected # Mark the row as corrected or not
updated_raw_data.append(row)
except ValidationError as e:
logger.error(
f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
)
response_data = SpreadsheetResponse( response_data = SpreadsheetResponse(
data=validated_model, data=validated_model,
errors=errors, errors=errors,
raw_data=raw_data, raw_data=updated_raw_data,
dewars_count=len(dewars), dewars_count=len(dewars),
dewars=list(dewars), dewars=list(dewars),
pucks_count=len(pucks), pucks_count=len(pucks),
pucks=list(pucks), pucks=list(pucks),
samples_count=len(samples), samples_count=len(samples),
samples=list(samples), samples=list(samples),
headers=headers, # Include headers in the response headers=headers,
) )
logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
# Store row data for future use # Store row data for future use
for idx, row in enumerate(validated_model): for idx, row in enumerate(validated_model):
row_num = idx + 4 # Adjust row numbering if necessary row_num = idx + 4 # Adjust row numbering if necessary
@ -110,30 +135,60 @@ async def validate_cell(data: dict):
col_name = data.get("column") col_name = data.get("column")
value = data.get("value") value = data.get("value")
logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
# Get the full data for the row # Get the full data for the row
current_row_data = row_storage.get_row(row_num) current_row_data = row_storage.get_row(row_num)
# Update the cell value if not current_row_data:
current_row_data[col_name] = importer._clean_value( logger.error(f"No data found for row {row_num}")
value, importer.get_expected_type(col_name) raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
)
# Temporarily store the updated row data
row_storage.set_row(row_num, current_row_data)
logger.info(f"Validating cell: row {row_num}, column {col_name}, value {value}")
try: try:
# Ensure we're using the full row data context for validation # Determine the expected type for the column
SpreadsheetModel( expected_type = importer.get_expected_type(col_name)
**current_row_data
) # Instantiates the Pydantic model, performing validation # Clean and validate the specific field
logger.info(f"Validation succeeded for row {row_num}, column {col_name}") cleaned_value = importer._clean_value(value, expected_type)
return {"is_valid": True, "message": ""} current_row_data[col_name] = cleaned_value # Update raw data
# If the column belongs to the nested `data_collection_parameters`
if col_name in DataCollectionParameters.model_fields:
# Ensure current_nested is a Pydantic model
nested_data = current_row_data.get("data_collection_parameters")
if isinstance(
nested_data, dict
): # If it's a dict, convert it to a Pydantic model
current_nested = DataCollectionParameters(**nested_data)
elif isinstance(
nested_data, DataCollectionParameters
): # Already a valid model
current_nested = nested_data
else: # If it's None or anything else, create a new instance
current_nested = DataCollectionParameters()
# Convert the model to a dictionary, update the specific field, and
# re-create the Pydantic model
nested_params = current_nested.model_dump()
nested_params[col_name] = cleaned_value # Update the nested field
current_row_data["data_collection_parameters"] = DataCollectionParameters(
**nested_params
)
return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
except ValidationError as e: except ValidationError as e:
# Extract the first error message # Handle and log errors
message = e.errors()[0]["msg"] logger.error(f"Validation error details: {e.errors()}")
column_error = next(
(err for err in e.errors() if err.get("loc")[0] == col_name), None
)
message = column_error["msg"] if column_error else "Validation failed."
logger.error( logger.error(
f"Validation failed for row {row_num}, column {col_name}: {message}" f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
) )
return {"is_valid": False, "message": message} return {"is_valid": False, "message": message}
except Exception as e:
# Log unexpected issues
logger.error(f"Unexpected error during validation: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")

View File

@ -4,6 +4,7 @@ from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated from typing_extensions import Annotated
import logging import logging
from app.schemas import DataCollectionParameters
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -27,45 +28,8 @@ class SpreadsheetModel(BaseModel):
positioninpuck: int # Only accept positive integers between 1 and 16 positioninpuck: int # Only accept positive integers between 1 and 16
priority: Optional[int] priority: Optional[int]
comments: Optional[str] comments: Optional[str]
directory: Optional[str]
proteinname: Optional[str] = "" # Alphanumeric validation proteinname: Optional[str] = "" # Alphanumeric validation
oscillation: Optional[float] = None # Only accept positive float data_collection_parameters: Optional[DataCollectionParameters] = None
exposure: Optional[float] = None # Only accept positive floats between 0 and 1
totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
transmission: Optional[
int
] = None # Only accept positive integers between 0 and 100
targetresolution: Optional[float] = None # Only accept positive float
aperture: Optional[str] = None # Optional string field
datacollectiontype: Optional[
str
] = None # Only accept "standard", other types might be added later
processingpipeline: Optional[
str
] = "" # Only accept "gopy", "autoproc", "xia2dials"
spacegroupnumber: Optional[
int
] = None # Only accept positive integers between 1 and 230
cellparameters: Optional[
str
] = None # Must be a set of six positive floats or integers
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
rescutvalue: Optional[
float
] = None # Must be a positive float if rescutkey is provided
userresolution: Optional[float] = None
pdbid: Optional[
str
] = "" # Accepts either the format of the protein data bank code or {provided}
autoprocfull: Optional[bool] = None
procfull: Optional[bool] = None
adpenabled: Optional[bool] = None
noano: Optional[bool] = None
ffcscampaign: Optional[bool] = None
trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
autoprocextraparams: Optional[str] = None # Optional string field
chiphiangles: Optional[float] = None # Optional float field between 0 and 30
dose: Optional[float] = None # Optional float field
# Add pucktype validation # Add pucktype validation
@field_validator("pucktype", mode="before") @field_validator("pucktype", mode="before")
@ -99,50 +63,6 @@ class SpreadsheetModel(BaseModel):
), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed." ), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
return v return v
@field_validator("directory", mode="before")
@classmethod
def directory_characters(cls, v):
logger.debug(f"Validating 'directory' field with value: {repr(v)}")
# Assign default value if v is None or empty
if not v:
default_value = "{sgPuck}/{sgPosition}"
logger.warning(
f"'directory' field is empty or None. Assigning default value: "
f"{default_value}"
)
return default_value
v = str(v).strip("/").replace(" ", "_")
if "\n" in v:
raise ValueError(f"'{v}' is not valid. Newline character detected.")
# Replace valid macros for consistency
valid_macros = [
"{date}",
"{prefix}",
"{sgPuck}",
"{sgPosition}",
"{beamline}",
"{sgPrefix}",
"{sgPriority}",
"{protein}",
"{method}",
]
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
v = pattern.sub("macro", v)
# Ensure only allowed characters are in the directory value
allowed_chars = "[a-z0-9_.+-]"
directory_re = re.compile(
f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
)
if not directory_re.match(v):
raise ValueError(
f"'{v}' is not valid. Value must be a valid path or macro."
)
return v
@field_validator("positioninpuck", mode="before") @field_validator("positioninpuck", mode="before")
@classmethod @classmethod
def positioninpuck_possible(cls, v): def positioninpuck_possible(cls, v):
@ -168,198 +88,6 @@ class SpreadsheetModel(BaseModel):
) from e ) from e
return v return v
@field_validator("aperture", mode="before")
@classmethod
def aperture_selection(cls, v):
if v is not None:
try:
v = int(float(v))
if v not in {1, 2, 3}:
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be 1, 2, or 3."
) from e
return v
@field_validator("oscillation", "targetresolution", mode="before")
@classmethod
def positive_float_validator(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
@field_validator("exposure", mode="before")
@classmethod
def exposure_in_range(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 1):
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
) from e
return v
@field_validator("totalrange", mode="before")
@classmethod
def totalrange_in_range(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 360):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
) from e
return v
@field_validator("transmission", mode="before")
@classmethod
def transmission_fraction(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 100):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
) from e
return v
@field_validator("datacollectiontype", mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
allowed = {"standard"} # Other types of data collection might be added later
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("processingpipeline", mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
allowed = {"gopy", "autoproc", "xia2dials"}
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
# @field_validator("rescutkey", "rescutvalue", mode="before")
# @classmethod
# def rescutkey_value_pair(cls, values):
# rescutkey = values.get("rescutkey")
# rescutvalue = values.get("rescutvalue")
# if rescutkey and rescutvalue:
# if rescutkey not in {"is", "cchalf"}:
# raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
# if not isinstance(rescutvalue, float) or rescutvalue <= 0:
# raise ValueError(
# "Rescutvalue must be a positive float if rescutkey is provided"
# )
# return values
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
) from e
return v
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
# class TELLModel(SpreadsheetModel): # class TELLModel(SpreadsheetModel):
# input_order: int # input_order: int
# samplemountcount: int = 0 # samplemountcount: int = 0

View File

@ -1,7 +1,12 @@
from typing import List, Optional from typing import List, Optional
from datetime import datetime from datetime import datetime
from pydantic import BaseModel, EmailStr, constr, Field from pydantic import BaseModel, EmailStr, constr, Field, field_validator
from datetime import date from datetime import date
import logging
import re
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class loginToken(BaseModel): class loginToken(BaseModel):
@ -47,37 +52,299 @@ class DewarSerialNumber(DewarSerialNumberBase):
class DataCollectionParameters(BaseModel): class DataCollectionParameters(BaseModel):
priority: Optional[int] = None
comments: Optional[str] = None
directory: Optional[str] = None directory: Optional[str] = None
proteinname: Optional[str] = None oscillation: Optional[float] = None # Only accept positive float
oscillation: Optional[float] = None exposure: Optional[float] = None # Only accept positive floats between 0 and 1
aperture: Optional[str] = None totalrange: Optional[int] = None # Only accept positive integers between 0 and 360
exposure: Optional[float] = None transmission: Optional[
totalrange: Optional[int] = None int
transmission: Optional[int] = None ] = None # Only accept positive integers between 0 and 100
dose: Optional[float] = None targetresolution: Optional[float] = None # Only accept positive float
targetresolution: Optional[float] = None aperture: Optional[str] = None # Optional string field
datacollectiontype: Optional[str] = None datacollectiontype: Optional[
processingpipeline: Optional[str] = None str
spacegroupnumber: Optional[int] = None ] = None # Only accept "standard", other types might be added later
cellparameters: Optional[str] = None processingpipeline: Optional[
rescutkey: Optional[str] = None str
rescutvalue: Optional[float] = None ] = "" # Only accept "gopy", "autoproc", "xia2dials"
spacegroupnumber: Optional[
int
] = None # Only accept positive integers between 1 and 230
cellparameters: Optional[
str
] = None # Must be a set of six positive floats or integers
rescutkey: Optional[str] = None # Only accept "is" or "cchalf"
rescutvalue: Optional[
float
] = None # Must be a positive float if rescutkey is provided
userresolution: Optional[float] = None userresolution: Optional[float] = None
pdbid: Optional[str] = None pdbid: Optional[
str
] = "" # Accepts either the format of the protein data bank code or {provided}
autoprocfull: Optional[bool] = None autoprocfull: Optional[bool] = None
procfull: Optional[bool] = None procfull: Optional[bool] = None
adpenabled: Optional[bool] = None adpenabled: Optional[bool] = None
noano: Optional[bool] = None noano: Optional[bool] = None
ffcscampaign: Optional[bool] = None ffcscampaign: Optional[bool] = None
trustedhigh: Optional[float] = None trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0
autoprocextraparams: Optional[str] = None autoprocextraparams: Optional[str] = None # Optional string field
chiphiangles: Optional[float] = None chiphiangles: Optional[float] = None # Optional float field between 0 and 30
dose: Optional[float] = None # Optional float field
class Config: class Config:
from_attributes = True from_attributes = True
@field_validator("directory", mode="after")
@classmethod
def directory_characters(cls, v):
logger.debug(f"Validating 'directory' field with initial value: {repr(v)}")
# Default directory value if empty
if not v: # Handles None or empty cases
default_value = "{sgPuck}/{sgPosition}"
logger.warning(
f"'directory' field is empty or None. Assigning default value: "
f"{default_value}"
)
return default_value
# Strip trailing slashes and store original value for comparison
v = str(v).strip("/") # Ensure it's a string and no trailing slashes
original_value = v
# Replace spaces with underscores
v = v.replace(" ", "_")
logger.debug(f"Corrected 'directory', spaces replaced: {repr(v)}")
# Validate directory pattern with macros and allowed characters
valid_macros = [
"{date}",
"{prefix}",
"{sgPuck}",
"{sgPosition}",
"{beamline}",
"{sgPrefix}",
"{sgPriority}",
"{protein}",
"{method}",
]
valid_macro_pattern = re.compile(
"|".join(re.escape(macro) for macro in valid_macros)
)
# Check if the value contains valid macros
allowed_chars_pattern = "[a-z0-9_.+-/]"
v_without_macros = valid_macro_pattern.sub("macro", v)
allowed_path_pattern = re.compile(
f"^(({allowed_chars_pattern}+|macro)*/*)*$", re.IGNORECASE
)
if not allowed_path_pattern.match(v_without_macros):
raise ValueError(
f"'{v}' is not valid. Value must be a valid path or macro."
)
# Log and return corrected value
if v != original_value:
logger.info(f"Directory was corrected from '{original_value}' to '{v}'")
return v
@field_validator("aperture", mode="before")
@classmethod
def aperture_selection(cls, v):
if v is not None:
try:
v = int(float(v))
if v not in {1, 2, 3}:
raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be 1, 2, or 3."
) from e
return v
@field_validator("oscillation", "targetresolution", mode="before")
@classmethod
def positive_float_validator(cls, v):
logger.debug(f"Running positive_float_validator for value: {v}")
if v is not None:
try:
v = float(v)
if v <= 0:
logger.error(f"Validation failed: '{v}' is not greater than 0.")
raise ValueError(
f"'{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
logger.error(f"Validation failed: '{v}' caused error {str(e)}")
raise ValueError(
f"'{v}' is not valid. Value must be a positive float."
) from e
logger.debug(f"Validation succeeded for value: {v}")
return v
@field_validator("exposure", mode="before")
@classmethod
def exposure_in_range(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 1):
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 1."
) from e
return v
@field_validator("totalrange", mode="before")
@classmethod
def totalrange_in_range(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 360):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 360."
) from e
return v
@field_validator("transmission", mode="before")
@classmethod
def transmission_fraction(cls, v):
if v is not None:
try:
v = int(v)
if not (0 <= v <= 100):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 0 and 100."
) from e
return v
@field_validator("datacollectiontype", mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
allowed = {"standard"} # Other types of data collection might be added later
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("processingpipeline", mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
allowed = {"gopy", "autoproc", "xia2dials"}
if v and v.lower() not in allowed:
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
# @field_validator("rescutkey", "rescutvalue", mode="before")
# @classmethod
# def rescutkey_value_pair(cls, values):
# rescutkey = values.get("rescutkey")
# rescutvalue = values.get("rescutvalue")
# if rescutkey and rescutvalue:
# if rescutkey not in {"is", "cchalf"}:
# raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
# if not isinstance(rescutvalue, float) or rescutvalue <= 0:
# raise ValueError(
# "Rescutvalue must be a positive float if rescutkey is provided"
# )
# return values
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
) from e
return v
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
class SampleEventCreate(BaseModel): class SampleEventCreate(BaseModel):
event_type: str event_type: str

View File

@ -18,6 +18,49 @@ class SampleSpreadsheetImporter:
self.filename = None self.filename = None
self.model = None self.model = None
def get_expected_type(self, column_name: str) -> type:
"""
Returns the expected data type for a given column name.
"""
# Define a mapping of column names to expected types
column_type_mapping = {
"dewarname": str,
"puckname": str,
"pucktype": str,
"crystalname": str,
"positioninpuck": int,
"priority": int,
"comments": str,
"proteinname": str,
"directory": str,
"oscillation": float,
"exposure": float,
"totalrange": int,
"transmission": int,
"targetresolution": float,
"aperture": str,
"datacollectiontype": str,
"processingpipeline": str,
"spacegroupnumber": int,
"cellparameters": str,
"rescutkey": str,
"rescutvalue": float,
"userresolution": float,
"pdbid": str,
"autoprocfull": bool,
"procfull": bool,
"adpenabled": bool,
"noano": bool,
"ffcscampaign": bool,
"trustedhigh": float,
"autoprocextraparams": str,
"chiphiangles": float,
"dose": float,
}
# Return type if column exists, else default to str
return column_type_mapping.get(column_name, str)
def _clean_value(self, value, expected_type=None): def _clean_value(self, value, expected_type=None):
"""Clean value by converting it to the expected type and handle edge cases.""" """Clean value by converting it to the expected type and handle edge cases."""
if value is None: if value is None:
@ -139,13 +182,13 @@ class SampleSpreadsheetImporter:
continue continue
# Record raw data for later use # Record raw data for later use
raw_data.append({"row_num": index + 4, "data": row}) raw_data.append({"row_num": index + 4, "data": list(row)})
# Pad the row to ensure it has the expected number of columns # Ensure row has the expected number of columns
if len(row) < expected_columns: if len(row) < expected_columns:
row = list(row) + [None] * (expected_columns - len(row)) row = list(row) + [None] * (expected_columns - len(row))
# Prepare the record with the cleaned values # Prepare the record with cleaned values
record = { record = {
"dewarname": self._clean_value(row[0], str), "dewarname": self._clean_value(row[0], str),
"puckname": self._clean_value(row[1], str), "puckname": self._clean_value(row[1], str),
@ -154,8 +197,10 @@ class SampleSpreadsheetImporter:
"positioninpuck": self._clean_value(row[4], int), "positioninpuck": self._clean_value(row[4], int),
"priority": self._clean_value(row[5], int), "priority": self._clean_value(row[5], int),
"comments": self._clean_value(row[6], str), "comments": self._clean_value(row[6], str),
"directory": self._clean_value(row[7], str),
"proteinname": self._clean_value(row[8], str), "proteinname": self._clean_value(row[8], str),
}
record["data_collection_parameters"] = {
"directory": self._clean_value(row[7], str),
"oscillation": self._clean_value(row[9], float), "oscillation": self._clean_value(row[9], float),
"aperture": self._clean_value(row[10], str), "aperture": self._clean_value(row[10], str),
"exposure": self._clean_value(row[11], float), "exposure": self._clean_value(row[11], float),
@ -182,69 +227,45 @@ class SampleSpreadsheetImporter:
} }
try: try:
# Validate the record
validated_record = SpreadsheetModel(**record) validated_record = SpreadsheetModel(**record)
# Update the raw data with assigned default values
if (
validated_record.directory == "{sgPuck}/{sgPosition}"
and row[7] is None
):
row_list = list(row)
row_list[
7
] = validated_record.directory # Set the field to the default value
raw_data[-1]["data"] = row_list
raw_data[-1][
"default_set"
] = True # Mark this row as having a default value assigned
# Get the corrected `directory`
corrected_directory = (
validated_record.data_collection_parameters.directory
)
# Update `raw_data` to reflect the corrected value
raw_data[-1]["data"][
7
] = corrected_directory # Replace directory in raw data
raw_data[-1][
"directory"
] = corrected_directory # Add a top-level "directory" key
raw_data[-1]["default_set"] = (
corrected_directory == "{sgPuck}/{sgPosition}"
)
# Add validated record to the model
model.append(validated_record) model.append(validated_record)
logger.debug(f"Row {index + 4} processed and validated successfully")
except ValidationError as e: except ValidationError as e:
logger.error(f"Validation error in row {index + 4}: {e}") logger.error(f"Validation error in row {index + 4}: {e}")
for error in e.errors(): for error in e.errors():
field = error["loc"][0] field_path = error["loc"]
msg = error["msg"] msg = error["msg"]
# Map field name (which is the key in `record`) to its index in the
# row if field_path[0] == "data_collection_parameters":
field_to_col = { subfield = field_path[1]
"dewarname": 0, column_index = headers.index(subfield)
"puckname": 1, else:
"pucktype": 2, field = field_path[0]
"crystalname": 3, column_index = headers.index(field)
"positioninpuck": 4,
"priority": 5,
"comments": 6,
"directory": 7,
"proteinname": 8,
"oscillation": 9,
"aperture": 10,
"exposure": 11,
"totalrange": 12,
"transmission": 13,
"dose": 14,
"targetresolution": 15,
"datacollectiontype": 16,
"processingpipeline": 17,
"spacegroupnumber": 18,
"cellparameters": 19,
"rescutkey": 20,
"rescutvalue": 21,
"userresolution": 22,
"pdbid": 23,
"autoprocfull": 24,
"procfull": 25,
"adpenabled": 26,
"noano": 27,
"ffcscampaign": 28,
"trustedhigh": 29,
"autoprocextraparams": 30,
"chiphiangles": 31,
}
column_index = field_to_col[field]
error_info = { error_info = {
"row": index + 4, "row": index + 4,
"cell": column_index, "cell": column_index,
"value": row[column_index], # Value that caused the error "value": row[column_index],
"message": msg, "message": msg,
} }
errors.append(error_info) errors.append(error_info)

View File

@ -79,16 +79,17 @@ const SpreadsheetTable = ({
const errorMap = generateErrorMap(localErrors); const errorMap = generateErrorMap(localErrors);
useEffect(() => { useEffect(() => {
const initialNonEditableCells = new Set(); const updatedNonEditableCells = new Set();
raw_data.forEach((row, rowIndex) => { raw_data.forEach((row, rowIndex) => {
headers.forEach((_, colIndex) => { headers.forEach((_, colIndex) => {
const key = `${row.row_num}-${headers[colIndex]}`; const key = `${row.row_num}-${headers[colIndex]}`;
if (!errorMap.has(key)) { if (!errorMap.has(key)) {
initialNonEditableCells.add(`${rowIndex}-${colIndex}`); updatedNonEditableCells.add(`${rowIndex}-${colIndex}`);
} }
}); });
}); });
setNonEditableCells(initialNonEditableCells); setNonEditableCells(updatedNonEditableCells);
console.log("Recalculated nonEditableCells:", updatedNonEditableCells);
}, [raw_data, headers, errorMap]); }, [raw_data, headers, errorMap]);
const handleCellEdit = async (rowIndex, colIndex) => { const handleCellEdit = async (rowIndex, colIndex) => {
@ -97,14 +98,15 @@ const SpreadsheetTable = ({
const currentRow = updatedRawData[rowIndex]; const currentRow = updatedRawData[rowIndex];
const newValue = editingCell[`${rowIndex}-${colIndex}`]; const newValue = editingCell[`${rowIndex}-${colIndex}`];
if (newValue === undefined) return; if (newValue === undefined) return; // Ensure value is provided
// Prepare for validation request
if (!currentRow.data) { if (!currentRow.data) {
currentRow.data = []; currentRow.data = [];
} }
currentRow.data[colIndex] = newValue; currentRow.data[colIndex] = newValue;
// Reset editing state
setEditingCell((prev) => { setEditingCell((prev) => {
const updated = { ...prev }; const updated = { ...prev };
delete updated[`${rowIndex}-${colIndex}`]; delete updated[`${rowIndex}-${colIndex}`];
@ -115,27 +117,46 @@ const SpreadsheetTable = ({
const response = await SpreadsheetService.validateCellValidateCellPost({ const response = await SpreadsheetService.validateCellValidateCellPost({
row: currentRow.row_num, row: currentRow.row_num,
column: columnName, column: columnName,
value: newValue value: newValue,
}); });
if (response.is_valid !== undefined) { if (response && response.is_valid !== undefined) {
if (response.is_valid) { if (response.is_valid) {
// Handle validation success (remove error)
const correctedValue = response.corrected_value ?? newValue;
currentRow.data[colIndex] = correctedValue;
updatedRawData[rowIndex] = currentRow;
setRawData(updatedRawData); // Update table data
// Remove error associated with this cell
const updatedErrors = localErrors.filter( const updatedErrors = localErrors.filter(
(error) => !(error.row === currentRow.row_num && error.cell === colIndex) (error) => !(error.row === currentRow.row_num && error.cell === colIndex)
); );
setLocalErrors(updatedErrors); setLocalErrors(updatedErrors);
// Update non-editable state
setNonEditableCells((prev) => new Set([...prev, `${rowIndex}-${colIndex}`])); setNonEditableCells((prev) => new Set([...prev, `${rowIndex}-${colIndex}`]));
} else { } else {
// Handle validation failure (add error)
const errorMessage = response.message || "Invalid value.";
const newError = {
row: currentRow.row_num,
cell: colIndex,
message: errorMessage,
};
const updatedErrors = [ const updatedErrors = [
...localErrors, ...localErrors.filter((error) => !(error.row === newError.row && error.cell === newError.cell)), // Avoid duplicates
{ row: currentRow.row_num, cell: colIndex, message: response.message || 'Invalid value.' } newError,
]; ];
setLocalErrors(updatedErrors); setLocalErrors(updatedErrors);
} }
} else {
console.error("Unexpected response from backend:", response);
} }
setRawData(updatedRawData);
} catch (error) { } catch (error) {
console.error('Validation failed:', error); console.error("Validation request failed:", error);
} }
}; };
@ -431,46 +452,56 @@ const SpreadsheetTable = ({
{headers.map((header, colIndex) => { {headers.map((header, colIndex) => {
const key = `${row.row_num}-${header}`; const key = `${row.row_num}-${header}`;
const errorMessage = errorMap.get(key); const errorMessage = errorMap.get(key);
const isInvalid = !!errorMessage; const isInvalid = !!errorMap.get(`${row.row_num}-${headers[colIndex]}`);
const cellValue = (row.data && row.data[colIndex]) || ""; const cellValue = row.data[colIndex];
const editingValue = editingCell[`${rowIndex}-${colIndex}`]; const editingValue = editingCell[`${rowIndex}-${colIndex}`];
const isReadonly = !isInvalid; const isReadonly = !isInvalid && editingCell[`${rowIndex}-${colIndex}`] === undefined;
const isCorrected = colIndex === 7 && row.corrected; // Corrected field exists and is true
const isDefaultAssigned = colIndex === 7 && row.default_set; // Directory column (index 7) and marked as default_set const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true
return ( return (
<TableCell <TableCell
key={colIndex} key={colIndex}
align="center" align="center"
style={{ style={{
backgroundColor: isDefaultAssigned ? "#e6fbe6" : "transparent", // Light green for default backgroundColor:
color: isDefaultAssigned ? "#1b5e20" : "inherit", // Dark green text for default colIndex === 7 && isDefaultAssigned
? "#e6fbe6" // Default value for "directory"
: colIndex === 7 && isCorrected
? "#fff8e1" // Corrected directory
: "transparent", // No highlight for other columns
color: colIndex === 7 && isDefaultAssigned
? "#1b5e20" // Dark green text for default
: "inherit", // Normal text
}} }}
> >
<Tooltip title={errorMessage || ""} arrow disableHoverListener={!isInvalid}> <Tooltip
title={
colIndex === 7 && isCorrected
? "Value corrected automatically by the system."
: errorMessage || "" // Show validation errors for other columns
}
arrow
disableHoverListener={colIndex !== 7 && !errorMessage}
>
{isInvalid ? ( {isInvalid ? (
<TextField <TextField
value={editingValue !== undefined ? editingValue : cellValue} value={editingValue !== undefined ? editingValue : cellValue} // Ensure this reflects corrected value
onChange={(e) => onChange={(e) =>
setEditingCell({ setEditingCell({
...editingCell, ...editingCell,
[`${rowIndex}-${colIndex}`]: e.target.value, [`${rowIndex}-${colIndex}`]: e.target.value,
}) })
} }
onKeyDown={(e) => {
if (e.key === "Enter") {
handleCellEdit(rowIndex, colIndex);
}
}}
onBlur={() => handleCellBlur(rowIndex, colIndex)} onBlur={() => handleCellBlur(rowIndex, colIndex)}
error={isInvalid} error={isInvalid}
fullWidth fullWidth
variant="outlined" variant="outlined"
size="small" size="small"
disabled={isReadonly} disabled={!isInvalid}
/> />
) : ( ) : (
cellValue cellValue // This should reflect the updated 'raw_data'
)} )}
</Tooltip> </Tooltip>
</TableCell> </TableCell>