From 92306fcfa6aec33c7ddc01c620633e9dc106aa2a Mon Sep 17 00:00:00 2001
From: GotthardG <51994228+GotthardG@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:45:08 +0100
Subject: [PATCH] Add column type mapping and enhance validation

Introduced a backend mapping for column expected types, improving validation and error handling. Updated UI to highlight default and corrected values, with additional detailed validation for data collection parameters.
---
 backend/app/routers/spreadsheet.py           |  97 ++++--
 backend/app/sample_models.py                 | 276 +----------------
 backend/app/schemas.py                       | 309 +++++++++++++++++--
 backend/app/services/spreadsheet_service.py  | 135 ++++----
 frontend/src/components/SpreadsheetTable.tsx |  87 ++++--
 5 files changed, 503 insertions(+), 401 deletions(-)

diff --git a/backend/app/routers/spreadsheet.py b/backend/app/routers/spreadsheet.py
index d93733c..b9897a9 100644
--- a/backend/app/routers/spreadsheet.py
+++ b/backend/app/routers/spreadsheet.py
@@ -1,4 +1,5 @@
-from app.sample_models import SpreadsheetModel, SpreadsheetResponse
+from app.sample_models import SpreadsheetResponse
+from app.schemas import DataCollectionParameters
 from fastapi import APIRouter, UploadFile, File, HTTPException
 import logging
 from app.services.spreadsheet_service import (
@@ -39,6 +40,8 @@ async def download_template():
 @router.post("/upload", response_model=SpreadsheetResponse)
 async def upload_file(file: UploadFile = File(...)):
     """Process the uploaded spreadsheet and return validation results."""
+    from app.schemas import DataCollectionParameters
+
     try:
         logger.info(f"Received file: {file.filename}")
 
@@ -66,19 +69,41 @@ async def upload_file(file: UploadFile = File(...)):
         }
 
         # Construct the response model with the processed data
+        # Update raw_data with corrected directory values
+        updated_raw_data = []
+        for row in raw_data:
+            directory_value = row.get("directory") or row["data"][7]
+            try:
+                corrected_directory = DataCollectionParameters(
+                    directory=directory_value
+                ).directory
+                corrected = (
+                    directory_value != corrected_directory
+                )  # Check if a correction was made
+
+                row["data"][7] = corrected_directory
+                row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
+                row["corrected"] = corrected  # Mark the row as corrected or not
+                updated_raw_data.append(row)
+            except ValidationError as e:
+                logger.error(
+                    f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
+                )
+
         response_data = SpreadsheetResponse(
             data=validated_model,
             errors=errors,
-            raw_data=raw_data,
+            raw_data=updated_raw_data,
             dewars_count=len(dewars),
             dewars=list(dewars),
             pucks_count=len(pucks),
             pucks=list(pucks),
             samples_count=len(samples),
             samples=list(samples),
-            headers=headers,  # Include headers in the response
+            headers=headers,
         )
 
+        logger.debug(f"Final updated_raw_data sent in response: {updated_raw_data}")
         # Store row data for future use
         for idx, row in enumerate(validated_model):
             row_num = idx + 4  # Adjust row numbering if necessary
@@ -110,30 +135,60 @@ async def validate_cell(data: dict):
     col_name = data.get("column")
     value = data.get("value")
 
+    logger.info(f"Validating cell row {row_num}, column {col_name}, value {value}")
+
     # Get the full data for the row
     current_row_data = row_storage.get_row(row_num)
 
-    # Update the cell value
-    current_row_data[col_name] = importer._clean_value(
-        value, importer.get_expected_type(col_name)
-    )
-
-    # Temporarily store the updated row data
-    row_storage.set_row(row_num, current_row_data)
-
-    logger.info(f"Validating cell: row {row_num}, column {col_name}, value {value}")
+    if not current_row_data:
+        logger.error(f"No data found for row {row_num}")
+        raise HTTPException(status_code=404, detail=f"No data found for row {row_num}")
 
     try:
-        # Ensure we're using the full row data context for validation
-        SpreadsheetModel(
-            **current_row_data
-        )  # Instantiates the Pydantic model, performing validation
-        logger.info(f"Validation succeeded for row {row_num}, column {col_name}")
-        return {"is_valid": True, "message": ""}
+        # Determine the expected type for the column
+        expected_type = importer.get_expected_type(col_name)
+
+        # Clean and validate the specific field
+        cleaned_value = importer._clean_value(value, expected_type)
+        current_row_data[col_name] = cleaned_value  # Update raw data
+
+        # If the column belongs to the nested `data_collection_parameters`
+        if col_name in DataCollectionParameters.model_fields:
+            # Ensure current_nested is a Pydantic model
+            nested_data = current_row_data.get("data_collection_parameters")
+
+            if isinstance(
+                nested_data, dict
+            ):  # If it's a dict, convert it to a Pydantic model
+                current_nested = DataCollectionParameters(**nested_data)
+            elif isinstance(
+                nested_data, DataCollectionParameters
+            ):  # Already a valid model
+                current_nested = nested_data
+            else:  # If it's None or anything else, create a new instance
+                current_nested = DataCollectionParameters()
+
+            # Convert the model to a dictionary, update the specific field, and
+            # re-create the Pydantic model
+            nested_params = current_nested.model_dump()
+            nested_params[col_name] = cleaned_value  # Update the nested field
+            current_row_data["data_collection_parameters"] = DataCollectionParameters(
+                **nested_params
+            )
+
+        return {"is_valid": True, "message": "", "corrected_value": cleaned_value}
     except ValidationError as e:
-        # Extract the first error message
-        message = e.errors()[0]["msg"]
+        # Handle and log errors
+        logger.error(f"Validation error details: {e.errors()}")
+        column_error = next(
+            (err for err in e.errors() if err.get("loc")[0] == col_name), None
+        )
+        message = column_error["msg"] if column_error else "Validation failed."
         logger.error(
-            f"Validation failed for row {row_num}, column {col_name}: {message}"
+            f"Validation failed for row {row_num}, column {col_name}. Error: {message}"
         )
         return {"is_valid": False, "message": message}
+    except Exception as e:
+        # Log unexpected issues
+        logger.error(f"Unexpected error during validation: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error validating cell: {str(e)}")
diff --git a/backend/app/sample_models.py b/backend/app/sample_models.py
index 4dc5497..9f53f33 100644
--- a/backend/app/sample_models.py
+++ b/backend/app/sample_models.py
@@ -4,6 +4,7 @@ from typing import Any, Optional, List, Dict
 from pydantic import BaseModel, Field, field_validator
 from typing_extensions import Annotated
 import logging
+from app.schemas import DataCollectionParameters
 
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -27,45 +28,8 @@ class SpreadsheetModel(BaseModel):
     positioninpuck: int  # Only accept positive integers between 1 and 16
     priority: Optional[int]
     comments: Optional[str]
-    directory: Optional[str]
     proteinname: Optional[str] = ""  # Alphanumeric validation
-    oscillation: Optional[float] = None  # Only accept positive float
-    exposure: Optional[float] = None  # Only accept positive floats between 0 and 1
-    totalrange: Optional[int] = None  # Only accept positive integers between 0 and 360
-    transmission: Optional[
-        int
-    ] = None  # Only accept positive integers between 0 and 100
-    targetresolution: Optional[float] = None  # Only accept positive float
-    aperture: Optional[str] = None  # Optional string field
-    datacollectiontype: Optional[
-        str
-    ] = None  # Only accept "standard", other types might be added later
-    processingpipeline: Optional[
-        str
-    ] = ""  # Only accept "gopy", "autoproc", "xia2dials"
-    spacegroupnumber: Optional[
-        int
-    ] = None  # Only accept positive integers between 1 and 230
-    cellparameters: Optional[
-        str
-    ] = None  # Must be a set of six positive floats or integers
-    rescutkey: Optional[str] = None  # Only accept "is" or "cchalf"
-    rescutvalue: Optional[
-        float
-    ] = None  # Must be a positive float if rescutkey is provided
-    userresolution: Optional[float] = None
-    pdbid: Optional[
-        str
-    ] = ""  # Accepts either the format of the protein data bank code or {provided}
-    autoprocfull: Optional[bool] = None
-    procfull: Optional[bool] = None
-    adpenabled: Optional[bool] = None
-    noano: Optional[bool] = None
-    ffcscampaign: Optional[bool] = None
-    trustedhigh: Optional[float] = None  # Should be a float between 0 and 2.0
-    autoprocextraparams: Optional[str] = None  # Optional string field
-    chiphiangles: Optional[float] = None  # Optional float field between 0 and 30
-    dose: Optional[float] = None  # Optional float field
+    data_collection_parameters: Optional[DataCollectionParameters] = None
 
     # Add pucktype validation
     @field_validator("pucktype", mode="before")
@@ -99,50 +63,6 @@ class SpreadsheetModel(BaseModel):
         ), f" '{v}' is not valid. Only alphanumeric and . _ + - characters allowed."
         return v
 
-    @field_validator("directory", mode="before")
-    @classmethod
-    def directory_characters(cls, v):
-        logger.debug(f"Validating 'directory' field with value: {repr(v)}")
-
-        # Assign default value if v is None or empty
-        if not v:
-            default_value = "{sgPuck}/{sgPosition}"
-            logger.warning(
-                f"'directory' field is empty or None. Assigning default value: "
-                f"{default_value}"
-            )
-            return default_value
-
-        v = str(v).strip("/").replace(" ", "_")
-        if "\n" in v:
-            raise ValueError(f"'{v}' is not valid. Newline character detected.")
-
-        # Replace valid macros for consistency
-        valid_macros = [
-            "{date}",
-            "{prefix}",
-            "{sgPuck}",
-            "{sgPosition}",
-            "{beamline}",
-            "{sgPrefix}",
-            "{sgPriority}",
-            "{protein}",
-            "{method}",
-        ]
-        pattern = re.compile("|".join(re.escape(macro) for macro in valid_macros))
-        v = pattern.sub("macro", v)
-
-        # Ensure only allowed characters are in the directory value
-        allowed_chars = "[a-z0-9_.+-]"
-        directory_re = re.compile(
-            f"^(({allowed_chars}*|{allowed_chars}+)*/*)*$", re.IGNORECASE
-        )
-        if not directory_re.match(v):
-            raise ValueError(
-                f"'{v}' is not valid. Value must be a valid path or macro."
-            )
-        return v
-
     @field_validator("positioninpuck", mode="before")
     @classmethod
     def positioninpuck_possible(cls, v):
@@ -168,198 +88,6 @@ class SpreadsheetModel(BaseModel):
                 ) from e
         return v
 
-    @field_validator("aperture", mode="before")
-    @classmethod
-    def aperture_selection(cls, v):
-        if v is not None:
-            try:
-                v = int(float(v))
-                if v not in {1, 2, 3}:
-                    raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be 1, 2, or 3."
-                ) from e
-        return v
-
-    @field_validator("oscillation", "targetresolution", mode="before")
-    @classmethod
-    def positive_float_validator(cls, v):
-        if v is not None:
-            try:
-                v = float(v)
-                if v <= 0:
-                    raise ValueError(
-                        f" '{v}' is not valid. Value must be a positive float."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be a positive float."
-                ) from e
-        return v
-
-    @field_validator("exposure", mode="before")
-    @classmethod
-    def exposure_in_range(cls, v):
-        if v is not None:
-            try:
-                v = float(v)
-                if not (0 <= v <= 1):
-                    raise ValueError(
-                        f" '{v}' is not valid. Value must be a float between 0 and 1."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be a float between 0 and 1."
-                ) from e
-        return v
-
-    @field_validator("totalrange", mode="before")
-    @classmethod
-    def totalrange_in_range(cls, v):
-        if v is not None:
-            try:
-                v = int(v)
-                if not (0 <= v <= 360):
-                    raise ValueError(
-                        f" '{v}' is not valid."
-                        f"Value must be an integer between 0 and 360."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid."
-                    f"Value must be an integer between 0 and 360."
-                ) from e
-        return v
-
-    @field_validator("transmission", mode="before")
-    @classmethod
-    def transmission_fraction(cls, v):
-        if v is not None:
-            try:
-                v = int(v)
-                if not (0 <= v <= 100):
-                    raise ValueError(
-                        f" '{v}' is not valid."
-                        f"Value must be an integer between 0 and 100."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid."
-                    f"Value must be an integer between 0 and 100."
-                ) from e
-        return v
-
-    @field_validator("datacollectiontype", mode="before")
-    @classmethod
-    def datacollectiontype_allowed(cls, v):
-        allowed = {"standard"}  # Other types of data collection might be added later
-        if v and v.lower() not in allowed:
-            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
-        return v
-
-    @field_validator("processingpipeline", mode="before")
-    @classmethod
-    def processingpipeline_allowed(cls, v):
-        allowed = {"gopy", "autoproc", "xia2dials"}
-        if v and v.lower() not in allowed:
-            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
-        return v
-
-    @field_validator("spacegroupnumber", mode="before")
-    @classmethod
-    def spacegroupnumber_allowed(cls, v):
-        if v is not None:
-            try:
-                v = int(v)
-                if not (1 <= v <= 230):
-                    raise ValueError(
-                        f" '{v}' is not valid."
-                        f"Value must be an integer between 1 and 230."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid."
-                    f"Value must be an integer between 1 and 230."
-                ) from e
-        return v
-
-    @field_validator("cellparameters", mode="before")
-    @classmethod
-    def cellparameters_format(cls, v):
-        if v:
-            values = [float(i) for i in v.split(",")]
-            if len(values) != 6 or any(val <= 0 for val in values):
-                raise ValueError(
-                    f" '{v}' is not valid."
-                    f"Value must be a set of six positive floats or integers."
-                )
-        return v
-
-    # @field_validator("rescutkey", "rescutvalue", mode="before")
-    # @classmethod
-    # def rescutkey_value_pair(cls, values):
-    #    rescutkey = values.get("rescutkey")
-    #    rescutvalue = values.get("rescutvalue")
-    #    if rescutkey and rescutvalue:
-    #        if rescutkey not in {"is", "cchalf"}:
-    #            raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
-    #        if not isinstance(rescutvalue, float) or rescutvalue <= 0:
-    #            raise ValueError(
-    #                "Rescutvalue must be a positive float if rescutkey is provided"
-    #            )
-    #    return values
-
-    @field_validator("trustedhigh", mode="before")
-    @classmethod
-    def trustedhigh_allowed(cls, v):
-        if v is not None:
-            try:
-                v = float(v)
-                if not (0 <= v <= 2.0):
-                    raise ValueError(
-                        f" '{v}' is not valid."
-                        f"Value must be a float between 0 and 2.0."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
-                ) from e
-        return v
-
-    @field_validator("chiphiangles", mode="before")
-    @classmethod
-    def chiphiangles_allowed(cls, v):
-        if v is not None:
-            try:
-                v = float(v)
-                if not (0 <= v <= 30):
-                    raise ValueError(
-                        f" '{v}' is not valid."
-                        f"Value must be a float between 0 and 30."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be a float between 0 and 30."
-                ) from e
-        return v
-
-    @field_validator("dose", mode="before")
-    @classmethod
-    def dose_positive(cls, v):
-        if v is not None:
-            try:
-                v = float(v)
-                if v <= 0:
-                    raise ValueError(
-                        f" '{v}' is not valid. Value must be a positive float."
-                    )
-            except (ValueError, TypeError) as e:
-                raise ValueError(
-                    f" '{v}' is not valid. Value must be a positive float."
-                ) from e
-        return v
-
     # class TELLModel(SpreadsheetModel):
     #    input_order: int
     #    samplemountcount: int = 0
diff --git a/backend/app/schemas.py b/backend/app/schemas.py
index c4da527..64429d7 100644
--- a/backend/app/schemas.py
+++ b/backend/app/schemas.py
@@ -1,7 +1,12 @@
 from typing import List, Optional
 from datetime import datetime
-from pydantic import BaseModel, EmailStr, constr, Field
+from pydantic import BaseModel, EmailStr, constr, Field, field_validator
 from datetime import date
+import logging
+import re
+
+logging.basicConfig(level=logging.DEBUG)
+logger = logging.getLogger(__name__)
 
 
 class loginToken(BaseModel):
@@ -47,37 +52,299 @@ class DewarSerialNumber(DewarSerialNumberBase):
 
 
 class DataCollectionParameters(BaseModel):
-    priority: Optional[int] = None
-    comments: Optional[str] = None
     directory: Optional[str] = None
-    proteinname: Optional[str] = None
-    oscillation: Optional[float] = None
-    aperture: Optional[str] = None
-    exposure: Optional[float] = None
-    totalrange: Optional[int] = None
-    transmission: Optional[int] = None
-    dose: Optional[float] = None
-    targetresolution: Optional[float] = None
-    datacollectiontype: Optional[str] = None
-    processingpipeline: Optional[str] = None
-    spacegroupnumber: Optional[int] = None
-    cellparameters: Optional[str] = None
-    rescutkey: Optional[str] = None
-    rescutvalue: Optional[float] = None
+    oscillation: Optional[float] = None  # Only accept positive float
+    exposure: Optional[float] = None  # Only accept positive floats between 0 and 1
+    totalrange: Optional[int] = None  # Only accept positive integers between 0 and 360
+    transmission: Optional[
+        int
+    ] = None  # Only accept positive integers between 0 and 100
+    targetresolution: Optional[float] = None  # Only accept positive float
+    aperture: Optional[str] = None  # Optional string field
+    datacollectiontype: Optional[
+        str
+    ] = None  # Only accept "standard", other types might be added later
+    processingpipeline: Optional[
+        str
+    ] = ""  # Only accept "gopy", "autoproc", "xia2dials"
+    spacegroupnumber: Optional[
+        int
+    ] = None  # Only accept positive integers between 1 and 230
+    cellparameters: Optional[
+        str
+    ] = None  # Must be a set of six positive floats or integers
+    rescutkey: Optional[str] = None  # Only accept "is" or "cchalf"
+    rescutvalue: Optional[
+        float
+    ] = None  # Must be a positive float if rescutkey is provided
     userresolution: Optional[float] = None
-    pdbid: Optional[str] = None
+    pdbid: Optional[
+        str
+    ] = ""  # Accepts either the format of the protein data bank code or {provided}
     autoprocfull: Optional[bool] = None
     procfull: Optional[bool] = None
     adpenabled: Optional[bool] = None
     noano: Optional[bool] = None
     ffcscampaign: Optional[bool] = None
-    trustedhigh: Optional[float] = None
-    autoprocextraparams: Optional[str] = None
-    chiphiangles: Optional[float] = None
+    trustedhigh: Optional[float] = None  # Should be a float between 0 and 2.0
+    autoprocextraparams: Optional[str] = None  # Optional string field
+    chiphiangles: Optional[float] = None  # Optional float field between 0 and 30
+    dose: Optional[float] = None  # Optional float field
 
     class Config:
         from_attributes = True
 
+    @field_validator("directory", mode="after")
+    @classmethod
+    def directory_characters(cls, v):
+        logger.debug(f"Validating 'directory' field with initial value: {repr(v)}")
+
+        # Default directory value if empty
+        if not v:  # Handles None or empty cases
+            default_value = "{sgPuck}/{sgPosition}"
+            logger.warning(
+                f"'directory' field is empty or None. Assigning default value: "
+                f"{default_value}"
+            )
+            return default_value
+
+        # Strip trailing slashes and store original value for comparison
+        v = str(v).strip("/")  # Ensure it's a string and no trailing slashes
+        original_value = v
+
+        # Replace spaces with underscores
+        v = v.replace(" ", "_")
+        logger.debug(f"Corrected 'directory', spaces replaced: {repr(v)}")
+
+        # Validate directory pattern with macros and allowed characters
+        valid_macros = [
+            "{date}",
+            "{prefix}",
+            "{sgPuck}",
+            "{sgPosition}",
+            "{beamline}",
+            "{sgPrefix}",
+            "{sgPriority}",
+            "{protein}",
+            "{method}",
+        ]
+        valid_macro_pattern = re.compile(
+            "|".join(re.escape(macro) for macro in valid_macros)
+        )
+
+        # Check if the value contains valid macros
+        allowed_chars_pattern = "[a-z0-9_.+-/]"
+        v_without_macros = valid_macro_pattern.sub("macro", v)
+
+        allowed_path_pattern = re.compile(
+            f"^(({allowed_chars_pattern}+|macro)*/*)*$", re.IGNORECASE
+        )
+        if not allowed_path_pattern.match(v_without_macros):
+            raise ValueError(
+                f"'{v}' is not valid. Value must be a valid path or macro."
+            )
+
+        # Log and return corrected value
+        if v != original_value:
+            logger.info(f"Directory was corrected from '{original_value}' to '{v}'")
+        return v
+
+    @field_validator("aperture", mode="before")
+    @classmethod
+    def aperture_selection(cls, v):
+        if v is not None:
+            try:
+                v = int(float(v))
+                if v not in {1, 2, 3}:
+                    raise ValueError(f" '{v}' is not valid. Value must be 1, 2, or 3.")
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be 1, 2, or 3."
+                ) from e
+        return v
+
+    @field_validator("oscillation", "targetresolution", mode="before")
+    @classmethod
+    def positive_float_validator(cls, v):
+        logger.debug(f"Running positive_float_validator for value: {v}")
+        if v is not None:
+            try:
+                v = float(v)
+                if v <= 0:
+                    logger.error(f"Validation failed: '{v}' is not greater than 0.")
+                    raise ValueError(
+                        f"'{v}' is not valid. Value must be a positive float."
+                    )
+            except (ValueError, TypeError) as e:
+                logger.error(f"Validation failed: '{v}' caused error {str(e)}")
+                raise ValueError(
+                    f"'{v}' is not valid. Value must be a positive float."
+                ) from e
+        logger.debug(f"Validation succeeded for value: {v}")
+        return v
+
+    @field_validator("exposure", mode="before")
+    @classmethod
+    def exposure_in_range(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 1):
+                    raise ValueError(
+                        f" '{v}' is not valid. Value must be a float between 0 and 1."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a float between 0 and 1."
+                ) from e
+        return v
+
+    @field_validator("totalrange", mode="before")
+    @classmethod
+    def totalrange_in_range(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (0 <= v <= 360):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 0 and 360."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 0 and 360."
+                ) from e
+        return v
+
+    @field_validator("transmission", mode="before")
+    @classmethod
+    def transmission_fraction(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (0 <= v <= 100):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 0 and 100."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 0 and 100."
+                ) from e
+        return v
+
+    @field_validator("datacollectiontype", mode="before")
+    @classmethod
+    def datacollectiontype_allowed(cls, v):
+        allowed = {"standard"}  # Other types of data collection might be added later
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
+        return v
+
+    @field_validator("processingpipeline", mode="before")
+    @classmethod
+    def processingpipeline_allowed(cls, v):
+        allowed = {"gopy", "autoproc", "xia2dials"}
+        if v and v.lower() not in allowed:
+            raise ValueError(f" '{v}' is not valid." f"Value must be one of {allowed}.")
+        return v
+
+    @field_validator("spacegroupnumber", mode="before")
+    @classmethod
+    def spacegroupnumber_allowed(cls, v):
+        if v is not None:
+            try:
+                v = int(v)
+                if not (1 <= v <= 230):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be an integer between 1 and 230."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be an integer between 1 and 230."
+                ) from e
+        return v
+
+    @field_validator("cellparameters", mode="before")
+    @classmethod
+    def cellparameters_format(cls, v):
+        if v:
+            values = [float(i) for i in v.split(",")]
+            if len(values) != 6 or any(val <= 0 for val in values):
+                raise ValueError(
+                    f" '{v}' is not valid."
+                    f"Value must be a set of six positive floats or integers."
+                )
+        return v
+
+    # @field_validator("rescutkey", "rescutvalue", mode="before")
+    # @classmethod
+    # def rescutkey_value_pair(cls, values):
+    #    rescutkey = values.get("rescutkey")
+    #    rescutvalue = values.get("rescutvalue")
+    #    if rescutkey and rescutvalue:
+    #        if rescutkey not in {"is", "cchalf"}:
+    #            raise ValueError("Rescutkey must be either 'is' or 'cchalf'")
+    #        if not isinstance(rescutvalue, float) or rescutvalue <= 0:
+    #            raise ValueError(
+    #                "Rescutvalue must be a positive float if rescutkey is provided"
+    #            )
+    #    return values
+
+    @field_validator("trustedhigh", mode="before")
+    @classmethod
+    def trustedhigh_allowed(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 2.0):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be a float between 0 and 2.0."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid." f"Value must be a float between 0 and 2.0."
+                ) from e
+        return v
+
+    @field_validator("chiphiangles", mode="before")
+    @classmethod
+    def chiphiangles_allowed(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if not (0 <= v <= 30):
+                    raise ValueError(
+                        f" '{v}' is not valid."
+                        f"Value must be a float between 0 and 30."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a float between 0 and 30."
+                ) from e
+        return v
+
+    @field_validator("dose", mode="before")
+    @classmethod
+    def dose_positive(cls, v):
+        if v is not None:
+            try:
+                v = float(v)
+                if v <= 0:
+                    raise ValueError(
+                        f" '{v}' is not valid. Value must be a positive float."
+                    )
+            except (ValueError, TypeError) as e:
+                raise ValueError(
+                    f" '{v}' is not valid. Value must be a positive float."
+                ) from e
+        return v
+
 
 class SampleEventCreate(BaseModel):
     event_type: str
diff --git a/backend/app/services/spreadsheet_service.py b/backend/app/services/spreadsheet_service.py
index 3cc6936..b107faf 100644
--- a/backend/app/services/spreadsheet_service.py
+++ b/backend/app/services/spreadsheet_service.py
@@ -18,6 +18,49 @@ class SampleSpreadsheetImporter:
         self.filename = None
         self.model = None
 
+    def get_expected_type(self, column_name: str) -> type:
+        """
+        Returns the expected data type for a given column name.
+        """
+        # Define a mapping of column names to expected types
+        column_type_mapping = {
+            "dewarname": str,
+            "puckname": str,
+            "pucktype": str,
+            "crystalname": str,
+            "positioninpuck": int,
+            "priority": int,
+            "comments": str,
+            "proteinname": str,
+            "directory": str,
+            "oscillation": float,
+            "exposure": float,
+            "totalrange": int,
+            "transmission": int,
+            "targetresolution": float,
+            "aperture": str,
+            "datacollectiontype": str,
+            "processingpipeline": str,
+            "spacegroupnumber": int,
+            "cellparameters": str,
+            "rescutkey": str,
+            "rescutvalue": float,
+            "userresolution": float,
+            "pdbid": str,
+            "autoprocfull": bool,
+            "procfull": bool,
+            "adpenabled": bool,
+            "noano": bool,
+            "ffcscampaign": bool,
+            "trustedhigh": float,
+            "autoprocextraparams": str,
+            "chiphiangles": float,
+            "dose": float,
+        }
+
+        # Return type if column exists, else default to str
+        return column_type_mapping.get(column_name, str)
+
     def _clean_value(self, value, expected_type=None):
         """Clean value by converting it to the expected type and handle edge cases."""
         if value is None:
@@ -139,13 +182,13 @@ class SampleSpreadsheetImporter:
                 continue
 
             # Record raw data for later use
-            raw_data.append({"row_num": index + 4, "data": row})
+            raw_data.append({"row_num": index + 4, "data": list(row)})
 
-            # Pad the row to ensure it has the expected number of columns
+            # Ensure row has the expected number of columns
             if len(row) < expected_columns:
                 row = list(row) + [None] * (expected_columns - len(row))
 
-            # Prepare the record with the cleaned values
+            # Prepare the record with cleaned values
             record = {
                 "dewarname": self._clean_value(row[0], str),
                 "puckname": self._clean_value(row[1], str),
@@ -154,8 +197,10 @@ class SampleSpreadsheetImporter:
                 "positioninpuck": self._clean_value(row[4], int),
                 "priority": self._clean_value(row[5], int),
                 "comments": self._clean_value(row[6], str),
-                "directory": self._clean_value(row[7], str),
                 "proteinname": self._clean_value(row[8], str),
+            }
+            record["data_collection_parameters"] = {
+                "directory": self._clean_value(row[7], str),
                 "oscillation": self._clean_value(row[9], float),
                 "aperture": self._clean_value(row[10], str),
                 "exposure": self._clean_value(row[11], float),
@@ -182,69 +227,45 @@ class SampleSpreadsheetImporter:
             }
 
             try:
+                # Validate the record
                 validated_record = SpreadsheetModel(**record)
-                # Update the raw data with assigned default values
-                if (
-                    validated_record.directory == "{sgPuck}/{sgPosition}"
-                    and row[7] is None
-                ):
-                    row_list = list(row)
-                    row_list[
-                        7
-                    ] = validated_record.directory  # Set the field to the default value
-                    raw_data[-1]["data"] = row_list
-                    raw_data[-1][
-                        "default_set"
-                    ] = True  # Mark this row as having a default value assigned
 
+                # Get the corrected `directory`
+                corrected_directory = (
+                    validated_record.data_collection_parameters.directory
+                )
+
+                # Update `raw_data` to reflect the corrected value
+                raw_data[-1]["data"][
+                    7
+                ] = corrected_directory  # Replace directory in raw data
+                raw_data[-1][
+                    "directory"
+                ] = corrected_directory  # Add a top-level "directory" key
+                raw_data[-1]["default_set"] = (
+                    corrected_directory == "{sgPuck}/{sgPosition}"
+                )
+
+                # Add validated record to the model
                 model.append(validated_record)
-                logger.debug(f"Row {index + 4} processed and validated successfully")
+
             except ValidationError as e:
                 logger.error(f"Validation error in row {index + 4}: {e}")
                 for error in e.errors():
-                    field = error["loc"][0]
+                    field_path = error["loc"]
                     msg = error["msg"]
-                    # Map field name (which is the key in `record`) to its index in the
-                    # row
-                    field_to_col = {
-                        "dewarname": 0,
-                        "puckname": 1,
-                        "pucktype": 2,
-                        "crystalname": 3,
-                        "positioninpuck": 4,
-                        "priority": 5,
-                        "comments": 6,
-                        "directory": 7,
-                        "proteinname": 8,
-                        "oscillation": 9,
-                        "aperture": 10,
-                        "exposure": 11,
-                        "totalrange": 12,
-                        "transmission": 13,
-                        "dose": 14,
-                        "targetresolution": 15,
-                        "datacollectiontype": 16,
-                        "processingpipeline": 17,
-                        "spacegroupnumber": 18,
-                        "cellparameters": 19,
-                        "rescutkey": 20,
-                        "rescutvalue": 21,
-                        "userresolution": 22,
-                        "pdbid": 23,
-                        "autoprocfull": 24,
-                        "procfull": 25,
-                        "adpenabled": 26,
-                        "noano": 27,
-                        "ffcscampaign": 28,
-                        "trustedhigh": 29,
-                        "autoprocextraparams": 30,
-                        "chiphiangles": 31,
-                    }
-                    column_index = field_to_col[field]
+
+                    if field_path[0] == "data_collection_parameters":
+                        subfield = field_path[1]
+                        column_index = headers.index(subfield)
+                    else:
+                        field = field_path[0]
+                        column_index = headers.index(field)
+
                     error_info = {
                         "row": index + 4,
                         "cell": column_index,
-                        "value": row[column_index],  # Value that caused the error
+                        "value": row[column_index],
                         "message": msg,
                     }
                     errors.append(error_info)
diff --git a/frontend/src/components/SpreadsheetTable.tsx b/frontend/src/components/SpreadsheetTable.tsx
index 7d57e48..f7d0b0c 100644
--- a/frontend/src/components/SpreadsheetTable.tsx
+++ b/frontend/src/components/SpreadsheetTable.tsx
@@ -79,16 +79,17 @@ const SpreadsheetTable = ({
     const errorMap = generateErrorMap(localErrors);
 
     useEffect(() => {
-        const initialNonEditableCells = new Set();
+        const updatedNonEditableCells = new Set();
         raw_data.forEach((row, rowIndex) => {
             headers.forEach((_, colIndex) => {
                 const key = `${row.row_num}-${headers[colIndex]}`;
                 if (!errorMap.has(key)) {
-                    initialNonEditableCells.add(`${rowIndex}-${colIndex}`);
+                    updatedNonEditableCells.add(`${rowIndex}-${colIndex}`);
                 }
             });
         });
-        setNonEditableCells(initialNonEditableCells);
+        setNonEditableCells(updatedNonEditableCells);
+        console.log("Recalculated nonEditableCells:", updatedNonEditableCells);
     }, [raw_data, headers, errorMap]);
 
     const handleCellEdit = async (rowIndex, colIndex) => {
@@ -97,14 +98,15 @@ const SpreadsheetTable = ({
         const currentRow = updatedRawData[rowIndex];
         const newValue = editingCell[`${rowIndex}-${colIndex}`];
 
-        if (newValue === undefined) return;
+        if (newValue === undefined) return; // Ensure value is provided
 
+        // Prepare for validation request
         if (!currentRow.data) {
             currentRow.data = [];
         }
-
         currentRow.data[colIndex] = newValue;
 
+        // Reset editing state
         setEditingCell((prev) => {
             const updated = { ...prev };
             delete updated[`${rowIndex}-${colIndex}`];
@@ -115,27 +117,46 @@ const SpreadsheetTable = ({
             const response = await SpreadsheetService.validateCellValidateCellPost({
                 row: currentRow.row_num,
                 column: columnName,
-                value: newValue
+                value: newValue,
             });
 
-            if (response.is_valid !== undefined) {
+            if (response && response.is_valid !== undefined) {
                 if (response.is_valid) {
+                    // Handle validation success (remove error)
+                    const correctedValue = response.corrected_value ?? newValue;
+                    currentRow.data[colIndex] = correctedValue;
+                    updatedRawData[rowIndex] = currentRow;
+
+                    setRawData(updatedRawData); // Update table data
+
+                    // Remove error associated with this cell
                     const updatedErrors = localErrors.filter(
                         (error) => !(error.row === currentRow.row_num && error.cell === colIndex)
                     );
                     setLocalErrors(updatedErrors);
+
+                    // Update non-editable state
                     setNonEditableCells((prev) => new Set([...prev, `${rowIndex}-${colIndex}`]));
                 } else {
+                    // Handle validation failure (add error)
+                    const errorMessage = response.message || "Invalid value.";
+                    const newError = {
+                        row: currentRow.row_num,
+                        cell: colIndex,
+                        message: errorMessage,
+                    };
+
                     const updatedErrors = [
-                        ...localErrors,
-                        { row: currentRow.row_num, cell: colIndex, message: response.message || 'Invalid value.' }
+                        ...localErrors.filter((error) => !(error.row === newError.row && error.cell === newError.cell)), // Avoid duplicates
+                        newError,
                     ];
                     setLocalErrors(updatedErrors);
                 }
+            } else {
+                console.error("Unexpected response from backend:", response);
             }
-            setRawData(updatedRawData);
         } catch (error) {
-            console.error('Validation failed:', error);
+            console.error("Validation request failed:", error);
         }
     };
 
@@ -431,46 +452,56 @@ const SpreadsheetTable = ({
                             {headers.map((header, colIndex) => {
                                 const key = `${row.row_num}-${header}`;
                                 const errorMessage = errorMap.get(key);
-                                const isInvalid = !!errorMessage;
-                                const cellValue = (row.data && row.data[colIndex]) || "";
+                                const isInvalid = !!errorMap.get(`${row.row_num}-${headers[colIndex]}`);
+                                const cellValue = row.data[colIndex];
                                 const editingValue = editingCell[`${rowIndex}-${colIndex}`];
-                                const isReadonly = !isInvalid;
-                            
-                                const isDefaultAssigned = colIndex === 7 && row.default_set;  // Directory column (index 7) and marked as default_set
-                            
+                                const isReadonly = !isInvalid && editingCell[`${rowIndex}-${colIndex}`] === undefined;
+                                const isCorrected = colIndex === 7 && row.corrected; // Corrected field exists and is true
+                                const isDefaultAssigned = colIndex === 7 && row.default_set; // Default-assigned field exists and is true
+
                                 return (
                                     <TableCell
                                         key={colIndex}
                                         align="center"
                                         style={{
-                                            backgroundColor: isDefaultAssigned ? "#e6fbe6" : "transparent", // Light green for default
-                                            color: isDefaultAssigned ? "#1b5e20" : "inherit",             // Dark green text for default
+                                            backgroundColor:
+                                                colIndex === 7 && isDefaultAssigned
+                                                    ? "#e6fbe6" // Default value for "directory"
+                                                    : colIndex === 7 && isCorrected
+                                                        ? "#fff8e1" // Corrected directory
+                                                        : "transparent", // No highlight for other columns
+                                            color: colIndex === 7 && isDefaultAssigned
+                                                ? "#1b5e20" // Dark green text for default
+                                                : "inherit", // Normal text
                                         }}
                                     >
-                                        <Tooltip title={errorMessage || ""} arrow disableHoverListener={!isInvalid}>
+                                        <Tooltip
+                                            title={
+                                                colIndex === 7 && isCorrected
+                                                    ? "Value corrected automatically by the system."
+                                                    : errorMessage || "" // Show validation errors for other columns
+                                            }
+                                            arrow
+                                            disableHoverListener={colIndex !== 7 && !errorMessage}
+                                        >
                                             {isInvalid ? (
                                                 <TextField
-                                                    value={editingValue !== undefined ? editingValue : cellValue}
+                                                    value={editingValue !== undefined ? editingValue : cellValue} // Ensure this reflects corrected value
                                                     onChange={(e) =>
                                                         setEditingCell({
                                                             ...editingCell,
                                                             [`${rowIndex}-${colIndex}`]: e.target.value,
                                                         })
                                                     }
-                                                    onKeyDown={(e) => {
-                                                        if (e.key === "Enter") {
-                                                            handleCellEdit(rowIndex, colIndex);
-                                                        }
-                                                    }}
                                                     onBlur={() => handleCellBlur(rowIndex, colIndex)}
                                                     error={isInvalid}
                                                     fullWidth
                                                     variant="outlined"
                                                     size="small"
-                                                    disabled={isReadonly}
+                                                    disabled={!isInvalid}
                                                 />
                                             ) : (
-                                                cellValue
+                                                cellValue // This should reflect the updated 'raw_data'
                                             )}
                                         </Tooltip>
                                     </TableCell>