Refactor spreadsheet processing to improve validation logic

Enhanced value cleaning and validation for spreadsheet data with dynamic handling of columns and corrections. Improved feedback for users with detailed error messages and visual indicators for corrected or defaulted values. Simplified backend and frontend logic for better maintainability and usability.
This commit is contained in:
GotthardG
2025-01-13 21:55:15 +01:00
parent f855930340
commit f6c19cc4da
5 changed files with 221 additions and 110 deletions

View File

@ -40,7 +40,6 @@ async def download_template():
@router.post("/upload", response_model=SpreadsheetResponse)
async def upload_file(file: UploadFile = File(...)):
"""Process the uploaded spreadsheet and return validation results."""
from app.schemas import DataCollectionParameters
try:
logger.info(f"Received file: {file.filename}")
@ -70,25 +69,92 @@ async def upload_file(file: UploadFile = File(...)):
# Construct the response model with the processed data
# Update raw_data with corrected directory values
# Iterate through raw_data rows
updated_raw_data = []
for row in raw_data:
directory_value = row.get("directory") or row["data"][7]
try:
corrected_directory = DataCollectionParameters(
directory=directory_value
).directory
corrected = (
directory_value != corrected_directory
) # Check if a correction was made
row["data"][7] = corrected_directory
row["default_set"] = corrected_directory == "{sgPuck}/{sgPosition}"
row["corrected"] = corrected # Mark the row as corrected or not
updated_raw_data.append(row)
except ValidationError as e:
logger.error(
f"[Row {row['row_num']}] Error validating directory: {e.errors()}"
for row in raw_data:
corrected = False # Tracks changes made in this row.
corrected_columns = [] # Stores names of columns corrected.
default_set = row.get("default_set", False)
# Ensure raw data rows are padded to match the headers length.
if len(row["data"]) < len(headers):
padding_length = len(headers) - len(row["data"])
logger.info(
f"Padding row {row.get('row_num')} with "
f"{padding_length} None values."
)
row["data"].extend([None] * padding_length)
# Validate data and apply corrections column by column.
for col_index, col_name in enumerate(headers):
original_value = row["data"][col_index]
expected_type = importer.get_expected_type(col_name)
try:
# Pass col_name explicitly to _clean_value
cleaned_value = importer._clean_value(
original_value, expected_type, col_name
)
# Check if a correction was applied
if cleaned_value != original_value:
corrected = True
corrected_columns.append(col_name)
# Update "directory" metadata explicitly, if applicable
if col_name == "directory":
row["directory"] = cleaned_value
# Update the raw data with the corrected value
row["data"][col_index] = cleaned_value
# Log the correction
logger.info(
f"Corrected field '{col_name}' in row {row['row_num']}: "
f"Original='{original_value}', Corrected='{cleaned_value}'"
)
except (ValueError, TypeError) as e:
# Handle and log validation errors specific to this column
logger.error(
f"Validation failed for row "
f"{row['row_num']}, column '{col_name}': "
f"{str(e)}"
)
errors.append(
{
"row": row["row_num"],
"column": col_name,
"value": original_value,
"message": str(e),
}
)
# Special case: Check and handle if "directory" was auto-corrected.
if (
row.get("directory")
and len(row["data"]) > 7
and row["data"][7] != row["directory"]
):
corrected = True
corrected_columns.append("directory")
row["data"][7] = row["directory"]
# Add correction metadata to the row if changes were made.
if corrected:
row["corrected"] = True
row["corrected_columns"] = corrected_columns
row["default_set"] = default_set
# Add the processed row to the updated data list.
updated_raw_data.append(row)
logger.info(
"Processing completed. "
f"Total rows processed: {len(raw_data)}, "
f"Rows corrected: {sum(1 for r in updated_raw_data if r.get('corrected'))}"
)
response_data = SpreadsheetResponse(
data=validated_model,