From 92306fcfa6aec33c7ddc01c620633e9dc106aa2a Mon Sep 17 00:00:00 2001 From: GotthardG <51994228+GotthardG@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:45:08 +0100 Subject: [PATCH] Add column type mapping and enhance validation Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters. --- backend/app/routers/spreadsheet.py | 97 ++++-- backend/app/sample_models.py | 276 +---------------- backend/app/schemas.py | 309 +++++++++++++++++-- backend/app/services/spreadsheet_service.py | 135 ++++---- frontend/src/components/SpreadsheetTable.tsx | 87 ++++-- 5 files changed, 503 insertions(+), 401 deletions(-) diff --git a/backend/app/routers/spreadsheet.py b/backend/app/routers/spreadsheet.py index d93733c..b9897a9 100644 --- a/backend/app/routers/spreadsheet.py +++ b/backend/app/routers/spreadsheet.py @@ -1,4 +1,5 @@ -from app.sample_models import SpreadsheetModel, SpreadsheetResponse +from app.sample_models import SpreadsheetResponse +from app.schemas import DataCollectionParameters from fastapi import APIRouter, UploadFile, File, HTTPException import logging from app.services.spreadsheet_service import ( @@ -39,6 +40,8 @@ async def download_template(): @router.post("/upload", response_model=SpreadsheetResponse) async def upload_file(file: UploadFile = File(...)): """Process the uploaded spreadsheet and return validation results.""" + from app.schemas import DataCollectionParameters + try: logger.info(f"Received file: {file.filename}") @@ -66,19 +69,41 @@ async def upload_file(file: UploadFile = File(...)): } # Construct the response model with the processed data + # Update raw_data with corrected directory values + updated_raw_data = [] + for row in raw_data: + directory_value = row.get("directory") or row["data"][7] + try: + corrected_directory = DataCollectionParameters( + directory=directory_value + ).directory + corrected = ( + directory_value != corrected_directory + ) # Check if a correction was made + + row["data"][7] = corrected_directory + row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}" + row["corrected"] = corrected # Mark the row as corrected or not + updated_raw_data.append(row) + except ValidationError as e: + logger.error( + f"[Row {row['row_num']}] Error validating directory: {e.errors()}" + ) + response_data = SpreadsheetResponse( data=validated_model, errors=errors, - raw_data=raw_data, + raw_data=updated_raw_data, dewars_count=len(dewars), dewars=list(dewars), pucks_count=len(pucks), pucks=list(pucks), samples_count=len(samples), samples=list(samples), - headers=headers, # Include headers in the response + headers=headers, ) + logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}") # Store row data for future use for idx, row in enumerate(validated_model): row_num = idx + 4 # Adjust row numbering if necessary @@ -110,30 +135,60 @@ async def validate_cell(data: dict): col_name = data.get("column") value = data.get("value") + logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}") + # Get the full data for the row current_row_data = row_storage.get_row(row_num) - # Update the cell value - current_row_data[col_name] = importer._clean_value( - value, importer.get_expected_type(col_name) - ) - - # Temporarily store the updated row data - row_storage.set_row(row_num, current_row_data) - - logger.info(f"Validating cell: row {row_num}, column {col_name}, value {value}") + if not current_row_data: + logger.error(f"No data found for row {row_num}") + raise HTTPException(status_code=404, detail=f"No data found for row {row_num}") try: - # Ensure we're using the full row data context for validation - SpreadsheetModel( - **current_row_data - ) # Instantiates the Pydantic model, performing validation - logger.info(f"Validation succeeded for row {row_num}, column {col_name}") - return {"is_valid": True, "message": ""} + # Determine the expected type for the column + expected_type = importer.get_expected_type(col_name) + + # Clean and validate the specific field + cleaned_value = importer._clean_value(value, expected_type) + current_row_data[col_name] = cleaned_value # Update raw data + + # If the column belongs to the nested `data_collection_parameters` + if col_name in DataCollectionParameters.model_fields: + # Ensure current_nested is a Pydantic model + nested_data = current_row_data.get("data_collection_parameters") + + if isinstance( + nested_data, dict + ): # If it's a dict, convert it to a Pydantic model + current_nested = DataCollectionParameters(**nested_data) + elif isinstance( + nested_data, DataCollectionParameters + ): # Already a valid model + current_nested = nested_data + else: # If it's None or anything else, create a new instance + current_nested = DataCollectionParameters() + + # Convert the model to a dictionary, update the specific field, and + # re-create the Pydantic model + nested_params = current_nested.model_dump() + nested_params[col_name] = cleaned_value # Update the nested field + current_row_data["data_collection_parameters"] = DataCollectionParameters( + **nested_params + ) + + return {"is_valid": True, "message": "", "corrected_value": cleaned_value} except ValidationError as e: - # Extract the first error message - message = e.errors()[0]["msg"] + # Handle and log errors + logger.error(f"Validation error details: {e.errors()}") + column_error = next( + (err for err in e.errors() if err.get("loc")[0] == col_name), None + ) + message = column_error["msg"] if column_error else "Validation failed." logger.error( - f"Validation failed for row {row_num}, column {col_name}: {message}" + f"Validation failed for row {row_num}, column {col_name}. Error: {message}" ) return {"is_valid": False, "message": message} + except Exception as e: + # Log unexpected issues + logger.error(f"Unexpected error during validation: {str(e)}") + raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}") diff --git a/backend/app/sample_models.py b/backend/app/sample_models.py index 4dc5497..9f53f33 100644 --- a/backend/app/sample_models.py +++ b/backend/app/sample_models.py @@ -4,6 +4,7 @@ from typing import Any, Optional, List, Dict from pydantic import BaseModel, Field, field_validator from typing_extensions import Annotated import logging +from app.schemas import DataCollectionParameters logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -27,45 +28,8 @@ class SpreadsheetModel(BaseModel): positioninpuck: int # Only accept positive integers between 1 and 16 priority: Optional[int] comments: Optional[str] - directory: Optional[str] proteinname: Optional[str] = "" # Alphanumeric validation - oscillation: Optional[float] = None # Only accept positive float - exposure: Optional[float] = None # Only accept positive floats between 0 and 1 - totalrange: Optional[int] = None # Only accept positive integers between 0 and 360 - transmission: Optional[ - int - ] = None # Only accept positive integers between 0 and 100 - targetresolution: Optional[float] = None # Only accept positive float - aperture: Optional[str] = None # Optional string field - datacollectiontype: Optional[ - str - ] = None # Only accept "standard", other types might be added later - processingpipeline: Optional[ - str - ] = "" # Only accept "gopy", "autoproc", "xia2dials" - spacegroupnumber: Optional[ - int - ] = None # Only accept positive integers between 1 and 230 - cellparameters: Optional[ - str - ] = None # Must be a set of six positive floats or integers - rescutkey: Optional[str] = None # Only accept "is" or "cchalf" - rescutvalue: Optional[ - float - ] = None # Must be a positive float if rescutkey is provided - userresolution: Optional[float] = None - pdbid: Optional[ - str - ] = "" # Accepts either the format of the protein data bank code or {provided} - autoprocfull: Optional[bool] = None - procfull: Optional[bool] = None - adpenabled: Optional[bool] = None - noano: Optional[bool] = None - ffcscampaign: Optional[bool] = None - trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0 - autoprocextraparams: Optional[str] = None # Optional string field - chiphiangles: Optional[float] = None # Optional float field between 0 and 30 - dose: Optional[float] = None # Optional float field + data_collection_parameters: Optional[DataCollectionParameters] = None # Add pucktype validation @field_validator("pucktype", mode="before") @@ -99,50 +63,6 @@ class SpreadsheetModel(BaseModel): ), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed." return v - @field_validator("directory", mode="before") - @classmethod - def directory_characters(cls, v): - logger.debug(f"Validating 'directory' field with value: {repr(v)}") - - # Assign default value if v is None or empty - if not v: - default_value = "{sgPuck}/{sgPosition}" - logger.warning( - f"'directory' field is empty or None. Assigning default value: " - f"{default_value}" - ) - return default_value - - v = str(v).strip("/").replace(" ", "_") - if "\n" in v: - raise ValueError(f"'{v}' is not valid. Newline character detected.") - - # Replace valid macros for consistency - valid_macros = [ - "{date}", - "{prefix}", - "{sgPuck}", - "{sgPosition}", - "{beamline}", - "{sgPrefix}", - "{sgPriority}", - "{protein}", - "{method}", - ] - pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros)) - v = pattern.sub("macro", v) - - # Ensure only allowed characters are in the directory value - allowed_chars = "[a-z0-9_.+-]" - directory_re = re.compile( - f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE - ) - if not directory_re.match(v): - raise ValueError( - f"'{v}' is not valid. Value must be a valid path or macro." - ) - return v - @field_validator("positioninpuck", mode="before") @classmethod def positioninpuck_possible(cls, v): @@ -168,198 +88,6 @@ class SpreadsheetModel(BaseModel): ) from e return v - @field_validator("aperture", mode="before") - @classmethod - def aperture_selection(cls, v): - if v is not None: - try: - v = int(float(v)) - if v not in {1, 2, 3}: - raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be 1, 2, or 3." - ) from e - return v - - @field_validator("oscillation", "targetresolution", mode="before") - @classmethod - def positive_float_validator(cls, v): - if v is not None: - try: - v = float(v) - if v <= 0: - raise ValueError( - f" '{v}' is not valid. Value must be a positive float." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be a positive float." - ) from e - return v - - @field_validator("exposure", mode="before") - @classmethod - def exposure_in_range(cls, v): - if v is not None: - try: - v = float(v) - if not (0 <= v <= 1): - raise ValueError( - f" '{v}' is not valid. Value must be a float between 0 and 1." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be a float between 0 and 1." - ) from e - return v - - @field_validator("totalrange", mode="before") - @classmethod - def totalrange_in_range(cls, v): - if v is not None: - try: - v = int(v) - if not (0 <= v <= 360): - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 0 and 360." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 0 and 360." - ) from e - return v - - @field_validator("transmission", mode="before") - @classmethod - def transmission_fraction(cls, v): - if v is not None: - try: - v = int(v) - if not (0 <= v <= 100): - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 0 and 100." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 0 and 100." - ) from e - return v - - @field_validator("datacollectiontype", mode="before") - @classmethod - def datacollectiontype_allowed(cls, v): - allowed = {"standard"} # Other types of data collection might be added later - if v and v.lower() not in allowed: - raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") - return v - - @field_validator("processingpipeline", mode="before") - @classmethod - def processingpipeline_allowed(cls, v): - allowed = {"gopy", "autoproc", "xia2dials"} - if v and v.lower() not in allowed: - raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") - return v - - @field_validator("spacegroupnumber", mode="before") - @classmethod - def spacegroupnumber_allowed(cls, v): - if v is not None: - try: - v = int(v) - if not (1 <= v <= 230): - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 1 and 230." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid." - f"Value must be an integer between 1 and 230." - ) from e - return v - - @field_validator("cellparameters", mode="before") - @classmethod - def cellparameters_format(cls, v): - if v: - values = [float(i) for i in v.split(",")] - if len(values) != 6 or any(val <= 0 for val in values): - raise ValueError( - f" '{v}' is not valid." - f"Value must be a set of six positive floats or integers." - ) - return v - - # @field_validator("rescutkey", "rescutvalue", mode="before") - # @classmethod - # def rescutkey_value_pair(cls, values): - # rescutkey = values.get("rescutkey") - # rescutvalue = values.get("rescutvalue") - # if rescutkey and rescutvalue: - # if rescutkey not in {"is", "cchalf"}: - # raise ValueError("Rescutkey must be either 'is' or 'cchalf'") - # if not isinstance(rescutvalue, float) or rescutvalue <= 0: - # raise ValueError( - # "Rescutvalue must be a positive float if rescutkey is provided" - # ) - # return values - - @field_validator("trustedhigh", mode="before") - @classmethod - def trustedhigh_allowed(cls, v): - if v is not None: - try: - v = float(v) - if not (0 <= v <= 2.0): - raise ValueError( - f" '{v}' is not valid." - f"Value must be a float between 0 and 2.0." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid." f"Value must be a float between 0 and 2.0." - ) from e - return v - - @field_validator("chiphiangles", mode="before") - @classmethod - def chiphiangles_allowed(cls, v): - if v is not None: - try: - v = float(v) - if not (0 <= v <= 30): - raise ValueError( - f" '{v}' is not valid." - f"Value must be a float between 0 and 30." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be a float between 0 and 30." - ) from e - return v - - @field_validator("dose", mode="before") - @classmethod - def dose_positive(cls, v): - if v is not None: - try: - v = float(v) - if v <= 0: - raise ValueError( - f" '{v}' is not valid. Value must be a positive float." - ) - except (ValueError, TypeError) as e: - raise ValueError( - f" '{v}' is not valid. Value must be a positive float." - ) from e - return v - # class TELLModel(SpreadsheetModel): # input_order: int # samplemountcount: int = 0 diff --git a/backend/app/schemas.py b/backend/app/schemas.py index c4da527..64429d7 100644 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1,7 +1,12 @@ from typing import List, Optional from datetime import datetime -from pydantic import BaseModel, EmailStr, constr, Field +from pydantic import BaseModel, EmailStr, constr, Field, field_validator from datetime import date +import logging +import re + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) class loginToken(BaseModel): @@ -47,37 +52,299 @@ class DewarSerialNumber(DewarSerialNumberBase): class DataCollectionParameters(BaseModel): - priority: Optional[int] = None - comments: Optional[str] = None directory: Optional[str] = None - proteinname: Optional[str] = None - oscillation: Optional[float] = None - aperture: Optional[str] = None - exposure: Optional[float] = None - totalrange: Optional[int] = None - transmission: Optional[int] = None - dose: Optional[float] = None - targetresolution: Optional[float] = None - datacollectiontype: Optional[str] = None - processingpipeline: Optional[str] = None - spacegroupnumber: Optional[int] = None - cellparameters: Optional[str] = None - rescutkey: Optional[str] = None - rescutvalue: Optional[float] = None + oscillation: Optional[float] = None # Only accept positive float + exposure: Optional[float] = None # Only accept positive floats between 0 and 1 + totalrange: Optional[int] = None # Only accept positive integers between 0 and 360 + transmission: Optional[ + int + ] = None # Only accept positive integers between 0 and 100 + targetresolution: Optional[float] = None # Only accept positive float + aperture: Optional[str] = None # Optional string field + datacollectiontype: Optional[ + str + ] = None # Only accept "standard", other types might be added later + processingpipeline: Optional[ + str + ] = "" # Only accept "gopy", "autoproc", "xia2dials" + spacegroupnumber: Optional[ + int + ] = None # Only accept positive integers between 1 and 230 + cellparameters: Optional[ + str + ] = None # Must be a set of six positive floats or integers + rescutkey: Optional[str] = None # Only accept "is" or "cchalf" + rescutvalue: Optional[ + float + ] = None # Must be a positive float if rescutkey is provided userresolution: Optional[float] = None - pdbid: Optional[str] = None + pdbid: Optional[ + str + ] = "" # Accepts either the format of the protein data bank code or {provided} autoprocfull: Optional[bool] = None procfull: Optional[bool] = None adpenabled: Optional[bool] = None noano: Optional[bool] = None ffcscampaign: Optional[bool] = None - trustedhigh: Optional[float] = None - autoprocextraparams: Optional[str] = None - chiphiangles: Optional[float] = None + trustedhigh: Optional[float] = None # Should be a float between 0 and 2.0 + autoprocextraparams: Optional[str] = None # Optional string field + chiphiangles: Optional[float] = None # Optional float field between 0 and 30 + dose: Optional[float] = None # Optional float field class Config: from_attributes = True + @field_validator("directory", mode="after") + @classmethod + def directory_characters(cls, v): + logger.debug(f"Validating 'directory' field with initial value: {repr(v)}") + + # Default directory value if empty + if not v: # Handles None or empty cases + default_value = "{sgPuck}/{sgPosition}" + logger.warning( + f"'directory' field is empty or None. Assigning default value: " + f"{default_value}" + ) + return default_value + + # Strip trailing slashes and store original value for comparison + v = str(v).strip("/") # Ensure it's a string and no trailing slashes + original_value = v + + # Replace spaces with underscores + v = v.replace(" ", "_") + logger.debug(f"Corrected 'directory', spaces replaced: {repr(v)}") + + # Validate directory pattern with macros and allowed characters + valid_macros = [ + "{date}", + "{prefix}", + "{sgPuck}", + "{sgPosition}", + "{beamline}", + "{sgPrefix}", + "{sgPriority}", + "{protein}", + "{method}", + ] + valid_macro_pattern = re.compile( + "|".join(re.escape(macro) for macro in valid_macros) + ) + + # Check if the value contains valid macros + allowed_chars_pattern = "[a-z0-9_.+-/]" + v_without_macros = valid_macro_pattern.sub("macro", v) + + allowed_path_pattern = re.compile( + f"^(({allowed_chars_pattern}+|macro)*/*)*$", re.IGNORECASE + ) + if not allowed_path_pattern.match(v_without_macros): + raise ValueError( + f"'{v}' is not valid. Value must be a valid path or macro." + ) + + # Log and return corrected value + if v != original_value: + logger.info(f"Directory was corrected from '{original_value}' to '{v}'") + return v + + @field_validator("aperture", mode="before") + @classmethod + def aperture_selection(cls, v): + if v is not None: + try: + v = int(float(v)) + if v not in {1, 2, 3}: + raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.") + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid. Value must be 1, 2, or 3." + ) from e + return v + + @field_validator("oscillation", "targetresolution", mode="before") + @classmethod + def positive_float_validator(cls, v): + logger.debug(f"Running positive_float_validator for value: {v}") + if v is not None: + try: + v = float(v) + if v <= 0: + logger.error(f"Validation failed: '{v}' is not greater than 0.") + raise ValueError( + f"'{v}' is not valid. Value must be a positive float." + ) + except (ValueError, TypeError) as e: + logger.error(f"Validation failed: '{v}' caused error {str(e)}") + raise ValueError( + f"'{v}' is not valid. Value must be a positive float." + ) from e + logger.debug(f"Validation succeeded for value: {v}") + return v + + @field_validator("exposure", mode="before") + @classmethod + def exposure_in_range(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 1): + raise ValueError( + f" '{v}' is not valid. Value must be a float between 0 and 1." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid. Value must be a float between 0 and 1." + ) from e + return v + + @field_validator("totalrange", mode="before") + @classmethod + def totalrange_in_range(cls, v): + if v is not None: + try: + v = int(v) + if not (0 <= v <= 360): + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 0 and 360." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 0 and 360." + ) from e + return v + + @field_validator("transmission", mode="before") + @classmethod + def transmission_fraction(cls, v): + if v is not None: + try: + v = int(v) + if not (0 <= v <= 100): + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 0 and 100." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 0 and 100." + ) from e + return v + + @field_validator("datacollectiontype", mode="before") + @classmethod + def datacollectiontype_allowed(cls, v): + allowed = {"standard"} # Other types of data collection might be added later + if v and v.lower() not in allowed: + raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") + return v + + @field_validator("processingpipeline", mode="before") + @classmethod + def processingpipeline_allowed(cls, v): + allowed = {"gopy", "autoproc", "xia2dials"} + if v and v.lower() not in allowed: + raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.") + return v + + @field_validator("spacegroupnumber", mode="before") + @classmethod + def spacegroupnumber_allowed(cls, v): + if v is not None: + try: + v = int(v) + if not (1 <= v <= 230): + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 1 and 230." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid." + f"Value must be an integer between 1 and 230." + ) from e + return v + + @field_validator("cellparameters", mode="before") + @classmethod + def cellparameters_format(cls, v): + if v: + values = [float(i) for i in v.split(",")] + if len(values) != 6 or any(val <= 0 for val in values): + raise ValueError( + f" '{v}' is not valid." + f"Value must be a set of six positive floats or integers." + ) + return v + + # @field_validator("rescutkey", "rescutvalue", mode="before") + # @classmethod + # def rescutkey_value_pair(cls, values): + # rescutkey = values.get("rescutkey") + # rescutvalue = values.get("rescutvalue") + # if rescutkey and rescutvalue: + # if rescutkey not in {"is", "cchalf"}: + # raise ValueError("Rescutkey must be either 'is' or 'cchalf'") + # if not isinstance(rescutvalue, float) or rescutvalue <= 0: + # raise ValueError( + # "Rescutvalue must be a positive float if rescutkey is provided" + # ) + # return values + + @field_validator("trustedhigh", mode="before") + @classmethod + def trustedhigh_allowed(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 2.0): + raise ValueError( + f" '{v}' is not valid." + f"Value must be a float between 0 and 2.0." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid." f"Value must be a float between 0 and 2.0." + ) from e + return v + + @field_validator("chiphiangles", mode="before") + @classmethod + def chiphiangles_allowed(cls, v): + if v is not None: + try: + v = float(v) + if not (0 <= v <= 30): + raise ValueError( + f" '{v}' is not valid." + f"Value must be a float between 0 and 30." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid. Value must be a float between 0 and 30." + ) from e + return v + + @field_validator("dose", mode="before") + @classmethod + def dose_positive(cls, v): + if v is not None: + try: + v = float(v) + if v <= 0: + raise ValueError( + f" '{v}' is not valid. Value must be a positive float." + ) + except (ValueError, TypeError) as e: + raise ValueError( + f" '{v}' is not valid. Value must be a positive float." + ) from e + return v + class SampleEventCreate(BaseModel): event_type: str diff --git a/backend/app/services/spreadsheet_service.py b/backend/app/services/spreadsheet_service.py index 3cc6936..b107faf 100644 --- a/backend/app/services/spreadsheet_service.py +++ b/backend/app/services/spreadsheet_service.py @@ -18,6 +18,49 @@ class SampleSpreadsheetImporter: self.filename = None self.model = None + def get_expected_type(self, column_name: str) -> type: + """ + Returns the expected data type for a given column name. + """ + # Define a mapping of column names to expected types + column_type_mapping = { + "dewarname": str, + "puckname": str, + "pucktype": str, + "crystalname": str, + "positioninpuck": int, + "priority": int, + "comments": str, + "proteinname": str, + "directory": str, + "oscillation": float, + "exposure": float, + "totalrange": int, + "transmission": int, + "targetresolution": float, + "aperture": str, + "datacollectiontype": str, + "processingpipeline": str, + "spacegroupnumber": int, + "cellparameters": str, + "rescutkey": str, + "rescutvalue": float, + "userresolution": float, + "pdbid": str, + "autoprocfull": bool, + "procfull": bool, + "adpenabled": bool, + "noano": bool, + "ffcscampaign": bool, + "trustedhigh": float, + "autoprocextraparams": str, + "chiphiangles": float, + "dose": float, + } + + # Return type if column exists, else default to str + return column_type_mapping.get(column_name, str) + def _clean_value(self, value, expected_type=None): """Clean value by converting it to the expected type and handle edge cases.""" if value is None: @@ -139,13 +182,13 @@ class SampleSpreadsheetImporter: continue # Record raw data for later use - raw_data.append({"row_num": index + 4, "data": row}) + raw_data.append({"row_num": index + 4, "data": list(row)}) - # Pad the row to ensure it has the expected number of columns + # Ensure row has the expected number of columns if len(row) < expected_columns: row = list(row) + [None] * (expected_columns - len(row)) - # Prepare the record with the cleaned values + # Prepare the record with cleaned values record = { "dewarname": self._clean_value(row[0], str), "puckname": self._clean_value(row[1], str), @@ -154,8 +197,10 @@ class SampleSpreadsheetImporter: "positioninpuck": self._clean_value(row[4], int), "priority": self._clean_value(row[5], int), "comments": self._clean_value(row[6], str), - "directory": self._clean_value(row[7], str), "proteinname": self._clean_value(row[8], str), + } + record["data_collection_parameters"] = { + "directory": self._clean_value(row[7], str), "oscillation": self._clean_value(row[9], float), "aperture": self._clean_value(row[10], str), "exposure": self._clean_value(row[11], float), @@ -182,69 +227,45 @@ class SampleSpreadsheetImporter: } try: + # Validate the record validated_record = SpreadsheetModel(**record) - # Update the raw data with assigned default values - if ( - validated_record.directory == "{sgPuck}/{sgPosition}" - and row[7] is None - ): - row_list = list(row) - row_list[ - 7 - ] = validated_record.directory # Set the field to the default value - raw_data[-1]["data"] = row_list - raw_data[-1][ - "default_set" - ] = True # Mark this row as having a default value assigned + # Get the corrected `directory` + corrected_directory = ( + validated_record.data_collection_parameters.directory + ) + + # Update `raw_data` to reflect the corrected value + raw_data[-1]["data"][ + 7 + ] = corrected_directory # Replace directory in raw data + raw_data[-1][ + "directory" + ] = corrected_directory # Add a top-level "directory" key + raw_data[-1]["default_set"] = ( + corrected_directory == "{sgPuck}/{sgPosition}" + ) + + # Add validated record to the model model.append(validated_record) - logger.debug(f"Row {index + 4} processed and validated successfully") + except ValidationError as e: logger.error(f"Validation error in row {index + 4}: {e}") for error in e.errors(): - field = error["loc"][0] + field_path = error["loc"] msg = error["msg"] - # Map field name (which is the key in `record`) to its index in the - # row - field_to_col = { - "dewarname": 0, - "puckname": 1, - "pucktype": 2, - "crystalname": 3, - "positioninpuck": 4, - "priority": 5, - "comments": 6, - "directory": 7, - "proteinname": 8, - "oscillation": 9, - "aperture": 10, - "exposure": 11, - "totalrange": 12, - "transmission": 13, - "dose": 14, - "targetresolution": 15, - "datacollectiontype": 16, - "processingpipeline": 17, - "spacegroupnumber": 18, - "cellparameters": 19, - "rescutkey": 20, - "rescutvalue": 21, - "userresolution": 22, - "pdbid": 23, - "autoprocfull": 24, - "procfull": 25, - "adpenabled": 26, - "noano": 27, - "ffcscampaign": 28, - "trustedhigh": 29, - "autoprocextraparams": 30, - "chiphiangles": 31, - } - column_index = field_to_col[field] + + if field_path[0] == "data_collection_parameters": + subfield = field_path[1] + column_index = headers.index(subfield) + else: + field = field_path[0] + column_index = headers.index(field) + error_info = { "row": index + 4, "cell": column_index, - "value": row[column_index], # Value that caused the error + "value": row[column_index], "message": msg, } errors.append(error_info) diff --git a/frontend/src/components/SpreadsheetTable.tsx b/frontend/src/components/SpreadsheetTable.tsx index 7d57e48..f7d0b0c 100644 --- a/frontend/src/components/SpreadsheetTable.tsx +++ b/frontend/src/components/SpreadsheetTable.tsx @@ -79,16 +79,17 @@ const SpreadsheetTable = ({ const errorMap = generateErrorMap(localErrors); useEffect(() => { - const initialNonEditableCells = new Set(); + const updatedNonEditableCells = new Set(); raw_data.forEach((row, rowIndex) => { headers.forEach((_, colIndex) => { const key = `${row.row_num}-${headers[colIndex]}`; if (!errorMap.has(key)) { - initialNonEditableCells.add(`${rowIndex}-${colIndex}`); + updatedNonEditableCells.add(`${rowIndex}-${colIndex}`); } }); }); - setNonEditableCells(initialNonEditableCells); + setNonEditableCells(updatedNonEditableCells); + console.log("Recalculated nonEditableCells:", updatedNonEditableCells); }, [raw_data, headers, errorMap]); const handleCellEdit = async (rowIndex, colIndex) => { @@ -97,14 +98,15 @@ const SpreadsheetTable = ({ const currentRow = updatedRawData[rowIndex]; const newValue = editingCell[`${rowIndex}-${colIndex}`]; - if (newValue === undefined) return; + if (newValue === undefined) return; // Ensure value is provided + // Prepare for validation request if (!currentRow.data) { currentRow.data = []; } - currentRow.data[colIndex] = newValue; + // Reset editing state setEditingCell((prev) => { const updated = { ...prev }; delete updated[`${rowIndex}-${colIndex}`]; @@ -115,27 +117,46 @@ const SpreadsheetTable = ({ const response = await SpreadsheetService.validateCellValidateCellPost({ row: currentRow.row_num, column: columnName, - value: newValue + value: newValue, }); - if (response.is_valid !== undefined) { + if (response && response.is_valid !== undefined) { if (response.is_valid) { + // Handle validation success (remove error) + const correctedValue = response.corrected_value ?? newValue; + currentRow.data[colIndex] = correctedValue; + updatedRawData[rowIndex] = currentRow; + + setRawData(updatedRawData); // Update table data + + // Remove error associated with this cell const updatedErrors = localErrors.filter( (error) => !(error.row === currentRow.row_num && error.cell === colIndex) ); setLocalErrors(updatedErrors); + + // Update non-editable state setNonEditableCells((prev) => new Set([...prev, `${rowIndex}-${colIndex}`])); } else { + // Handle validation failure (add error) + const errorMessage = response.message || "Invalid value."; + const newError = { + row: currentRow.row_num, + cell: colIndex, + message: errorMessage, + }; + const updatedErrors = [ - ...localErrors, - { row: currentRow.row_num, cell: colIndex, message: response.message || 'Invalid value.' } + ...localErrors.filter((error) => !(error.row === newError.row && error.cell === newError.cell)), // Avoid duplicates + newError, ]; setLocalErrors(updatedErrors); } + } else { + console.error("Unexpected response from backend:", response); } - setRawData(updatedRawData); } catch (error) { - console.error('Validation failed:', error); + console.error("Validation request failed:", error); } }; @@ -431,46 +452,56 @@ const SpreadsheetTable = ({ {headers.map((header, colIndex) => { const key = `${row.row_num}-${header}`; const errorMessage = errorMap.get(key); - const isInvalid = !!errorMessage; - const cellValue = (row.data && row.data[colIndex]) || ""; + const isInvalid = !!errorMap.get(`${row.row_num}-${headers[colIndex]}`); + const cellValue = row.data[colIndex]; const editingValue = editingCell[`${rowIndex}-${colIndex}`]; - const isReadonly = !isInvalid; - - const isDefaultAssigned = colIndex === 7 && row.default_set; // Directory column (index 7) and marked as default_set - + const isReadonly = !isInvalid && editingCell[`${rowIndex}-${colIndex}`] === undefined; + const isCorrected = colIndex === 7 && row.corrected; // Corrected field exists and is true + const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true + return ( - + {isInvalid ? ( setEditingCell({ ...editingCell, [`${rowIndex}-${colIndex}`]: e.target.value, }) } - onKeyDown={(e) => { - if (e.key === "Enter") { - handleCellEdit(rowIndex, colIndex); - } - }} onBlur={() => handleCellBlur(rowIndex, colIndex)} error={isInvalid} fullWidth variant="outlined" size="small" - disabled={isReadonly} + disabled={!isInvalid} /> ) : ( - cellValue + cellValue // This should reflect the updated 'raw_data' )}