Add spreadsheet enhancements and default handling

Implemented a toggleable spreadsheet UI component for sample data, added fields such as priority and comments, and improved backend validation. Default values for "directory" are now assigned when missing, with feedback highlighted in green on the front end.
This commit is contained in:
GotthardG
2025-01-06 14:40:02 +01:00
parent 9cb6ffbfb4
commit 54975b5919
12 changed files with 436 additions and 134 deletions

View File

@ -139,10 +139,14 @@ class Sample(Base):
id = Column(Integer, primary_key=True, index=True, autoincrement=True)
sample_name = Column(String(255), index=True)
position = Column(Integer) # Matches `position` in data creation script
proteinname = Column(String(255), index=True)
position = Column(Integer)
priority = Column(Integer)
comments = Column(String(255))
data_collection_parameters = Column(JSON, nullable=True)
# Foreign keys and relationships
dewar_id = Column(Integer, ForeignKey("dewars.id"))
puck_id = Column(Integer, ForeignKey("pucks.id"))
puck = relationship("Puck", back_populates="samples")
events = relationship("SampleEvent", back_populates="sample")

View File

@ -91,7 +91,10 @@ async def create_dewar(
sample = SampleModel(
puck_id=puck.id,
sample_name=sample_data.sample_name,
proteinname=sample_data.proteinname,
position=sample_data.position,
priority=sample_data.priority,
comments=sample_data.comments,
data_collection_parameters=sample_data.data_collection_parameters,
)
db.add(sample)
@ -285,6 +288,66 @@ async def download_dewar_label(dewar_id: int, db: Session = Depends(get_db)):
)
@router.get("/dewars/{dewar_id}/samples", response_model=dict)
async def get_dewar_samples(dewar_id: int, db: Session = Depends(get_db)):
# Fetch Dewar, associated Pucks, and Samples
dewar = db.query(DewarModel).filter(DewarModel.id == dewar_id).first()
if not dewar:
raise HTTPException(status_code=404, detail="Dewar not found")
pucks = db.query(PuckModel).filter(PuckModel.dewar_id == dewar.id).all()
data = {"dewar": {"id": dewar.id, "dewar_name": dewar.dewar_name}, "pucks": []}
for puck in pucks:
samples = db.query(SampleModel).filter(SampleModel.puck_id == puck.id).all()
data["pucks"].append(
{
"id": puck.id,
"name": puck.puck_name,
"type": puck.puck_type,
"samples": [
{
"id": sample.id,
"position": sample.position,
"dewar_name": dewar.dewar_name, # Add Dewar name here
"sample_name": sample.sample_name,
"priority": sample.priority,
"comments": sample.comments,
# "directory":sample.directory,
"proteinname": sample.proteinname,
# "oscillation": datacollection.oscillation,
# "aperture": 10,
# "exposure": 11,
# "totalrange": 12,
# "transmission": 13,
# "dose": 14,
# "targetresolution": 15,
# "datacollectiontype": 16,
# "processingpipeline": 17,
# "spacegroupnumber": 18,
# "cellparameters": 19,
# "rescutkey": 20,
# "rescutvalue": 21,
# "userresolution": 22,
# "pdbid": 23,
# "autoprocfull": 24,
# "procfull": 25,
# "adpenabled": 26,
# "noano": 27,
# "ffcscampaign": 28,
# "trustedhigh": 29,
# "autoprocextraparams": 30,
# "chiphiangles": 31,
}
for sample in samples
],
}
)
return data
@router.get("/", response_model=List[DewarSchema])
async def get_dewars(db: Session = Depends(get_db)):
try:

View File

@ -3,6 +3,10 @@ from typing import Any, Optional, List, Dict
from pydantic import BaseModel, Field, field_validator
from typing_extensions import Annotated
import logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class SpreadsheetModel(BaseModel):
@ -98,34 +102,45 @@ class SpreadsheetModel(BaseModel):
@field_validator("directory", mode="before")
@classmethod
def directory_characters(cls, v):
if v:
v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v):
raise ValueError(f" '{v}' is not valid. newline character detected.")
logger.debug(f"Validating 'directory' field with value: {repr(v)}")
valid_macros = [
"{date}",
"{prefix}",
"{sgpuck}",
"{puck}",
"{beamline}",
"{sgprefix}",
"{sgpriority}",
"{sgposition}",
"{protein}",
"{method}",
]
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
v = pattern.sub("macro", v)
allowed_chars = "[a-z0-9_.+-]"
directory_re = re.compile(
f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
# Assign default value if v is None or empty
if not v:
default_value = "{sgPuck}/{sgPosition}"
logger.warning(
f"'directory' field is empty or None. Assigning default value: "
f"{default_value}"
)
return default_value
v = str(v).strip("/").replace(" ", "_")
if "\n" in v:
raise ValueError(f"'{v}' is not valid. Newline character detected.")
# Replace valid macros for consistency
valid_macros = [
"{date}",
"{prefix}",
"{sgPuck}",
"{sgPosition}",
"{beamline}",
"{sgPrefix}",
"{sgPriority}",
"{protein}",
"{method}",
]
pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
v = pattern.sub("macro", v)
# Ensure only allowed characters are in the directory value
allowed_chars = "[a-z0-9_.+-]"
directory_re = re.compile(
f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
)
if not directory_re.match(v):
raise ValueError(
f"'{v}' is not valid. Value must be a valid path or macro."
)
if not directory_re.match(v):
raise ValueError(
f" '{v}' is not valid. Value must be a valid path or macro."
)
return v
@field_validator("positioninpuck", mode="before")
@ -251,110 +266,109 @@ class SpreadsheetModel(BaseModel):
raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
return v
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
)
except (ValueError, TypeError) as e:
@field_validator("spacegroupnumber", mode="before")
@classmethod
def spacegroupnumber_allowed(cls, v):
if v is not None:
try:
v = int(v)
if not (1 <= v <= 230):
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid."
f"Value must be an integer between 1 and 230."
) from e
return v
@field_validator("rescutkey", "rescutvalue", mode="before")
@classmethod
def rescutkey_value_pair(cls, values):
rescutkey = values.get("rescutkey")
rescutvalue = values.get("rescutvalue")
if rescutkey and rescutvalue:
if rescutkey not in {"is", "cchalf"}:
raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
if not isinstance(rescutvalue, float) or rescutvalue <= 0:
raise ValueError(
"Rescutvalue must be a positive float if rescutkey is provided"
)
return values
@field_validator("cellparameters", mode="before")
@classmethod
def cellparameters_format(cls, v):
if v:
values = [float(i) for i in v.split(",")]
if len(values) != 6 or any(val <= 0 for val in values):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a set of six positive floats or integers."
)
return v
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
)
except (ValueError, TypeError) as e:
# @field_validator("rescutkey", "rescutvalue", mode="before")
# @classmethod
# def rescutkey_value_pair(cls, values):
# rescutkey = values.get("rescutkey")
# rescutvalue = values.get("rescutvalue")
# if rescutkey and rescutvalue:
# if rescutkey not in {"is", "cchalf"}:
# raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
# if not isinstance(rescutvalue, float) or rescutvalue <= 0:
# raise ValueError(
# "Rescutvalue must be a positive float if rescutkey is provided"
# )
# return values
@field_validator("trustedhigh", mode="before")
@classmethod
def trustedhigh_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 2.0):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 2.0."
) from e
return v
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
) from e
return v
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
@field_validator("chiphiangles", mode="before")
@classmethod
def chiphiangles_allowed(cls, v):
if v is not None:
try:
v = float(v)
if not (0 <= v <= 30):
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
f" '{v}' is not valid."
f"Value must be a float between 0 and 30."
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a float between 0 and 30."
) from e
return v
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
)
except (ValueError, TypeError) as e:
@field_validator("dose", mode="before")
@classmethod
def dose_positive(cls, v):
if v is not None:
try:
v = float(v)
if v <= 0:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
)
except (ValueError, TypeError) as e:
raise ValueError(
f" '{v}' is not valid. Value must be a positive float."
) from e
return v
class TELLModel(SpreadsheetModel):
input_order: int
samplemountcount: int = 0
samplestatus: str = "not present"
puckaddress: str = "---"
username: str
puck_number: int
prefix: Optional[str]
folder: Optional[str]
# class TELLModel(SpreadsheetModel):
# input_order: int
# samplemountcount: int = 0
# samplestatus: str = "not present"
# puckaddress: str = "---"
# username: str
# puck_number: int
# prefix: Optional[str]
# folder: Optional[str]
class SpreadsheetResponse(BaseModel):

View File

@ -150,14 +150,20 @@ class Sample(BaseModel):
position: int # Position within the puck
puck_id: int
crystalname: Optional[str] = Field(None)
proteinname: Optional[str] = None
positioninpuck: Optional[int] = Field(None)
priority: Optional[int] = None
comments: Optional[str] = None
events: List[SampleEventCreate] = []
class SampleCreate(BaseModel):
sample_name: str = Field(..., alias="crystalname")
proteinname: Optional[str] = None
position: int = Field(..., alias="positioninpuck")
data_collection_parameters: Optional[DataCollectionParameters] = None
priority: Optional[int] = None
comments: Optional[str] = None
results: Optional[Results] = None
events: Optional[List[str]] = None

View File

@ -1,5 +1,4 @@
# Adjusting the ShipmentProcessor for better error handling and alignment
from sqlalchemy.orm import Session
from app.models import Shipment, Dewar, Puck, Sample, DataCollectionParameters
from app.schemas import ShipmentCreate, ShipmentResponse
@ -47,14 +46,17 @@ class ShipmentProcessor:
self.db.refresh(puck)
for sample_data in puck_data.samples:
data_collection_params = DataCollectionParameters(
data_collection_parameters = DataCollectionParameters(
**sample_data.data_collection_parameters.dict(by_alias=True)
)
sample = Sample(
puck_id=puck.id,
sample_name=sample_data.sample_name,
proteinname=sample_data.proteinname,
position=sample_data.position,
data_collection_parameters=data_collection_params,
priority=sample_data.priority,
comments=sample_data.comments,
data_collection_parameters=data_collection_parameters,
)
self.db.add(sample)
self.db.commit()

View File

@ -48,17 +48,6 @@ class SampleSpreadsheetImporter:
def import_spreadsheet(self, file):
return self.import_spreadsheet_with_errors(file)
def get_expected_type(self, col_name):
type_mapping = {
"dewarname": str,
"puckname": str,
"positioninpuck": int,
"priority": int,
"oscillation": float,
# Add all other mappings based on model requirements
}
return type_mapping.get(col_name, str) # Default to `str`
def import_spreadsheet_with_errors(
self, file
) -> Tuple[List[SpreadsheetModel], List[dict], List[dict], List[str]]:
@ -194,6 +183,20 @@ class SampleSpreadsheetImporter:
try:
validated_record = SpreadsheetModel(**record)
# Update the raw data with assigned default values
if (
validated_record.directory == "{sgPuck}/{sgPosition}"
and row[7] is None
):
row_list = list(row)
row_list[
7
] = validated_record.directory # Set the field to the default value
raw_data[-1]["data"] = row_list
raw_data[-1][
"default_set"
] = True # Mark this row as having a default value assigned
model.append(validated_record)
logger.debug(f"Row {index + 4} processed and validated successfully")
except ValidationError as e: