now with the heidi v1 spreadsheet validator

This commit is contained in:
GotthardG
2024-11-05 23:13:36 +01:00
parent 376352672f
commit 91468da9ed
7 changed files with 195 additions and 229 deletions

View File

@ -3,7 +3,7 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.routers import address, contact, proposal, dewar, shipment, upload, puck, spreadsheet
from app.routers import address, contact, proposal, dewar, shipment, puck, spreadsheet
from app.database import Base, engine, SessionLocal, load_sample_data
app = FastAPI()
@ -37,7 +37,6 @@ app.include_router(address.router, prefix="/addresses", tags=["addresses"])
app.include_router(proposal.router, prefix="/proposals", tags=["proposals"])
app.include_router(dewar.router, prefix="/dewars", tags=["dewars"])
app.include_router(shipment.router, prefix="/shipments", tags=["shipments"])
app.include_router(upload.router, tags=["upload"])
app.include_router(puck.router, prefix="/pucks", tags=["pucks"])
app.include_router(spreadsheet.router, tags=["spreadsheet"])

View File

@ -1,59 +1,51 @@
# app/routers/spreadsheet.py
# app/routes/spreadsheet.py
from fastapi import APIRouter, UploadFile, File, HTTPException
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
import logging
from app.services.spreadsheet_service import SampleSpreadsheetImporter, SpreadsheetImportError
router = APIRouter()
logger = logging.getLogger(__name__)
@router.post("/upload")
async def upload_file(file: UploadFile = File(...)):
importer = SampleSpreadsheetImporter()
try:
result = importer.import_spreadsheet(file)
logger.info(f"Received file: {file.filename}")
if not result:
logger.warning("No data extracted from spreadsheet.")
return {
"dewars_count": 0,
"dewars": [],
"pucks_count": 0,
"pucks": [],
"samples_count": 0,
"samples": []
}
# File type check
if not file.filename.endswith('.xlsx'):
logger.error("Invalid file format")
raise HTTPException(status_code=400, detail="Invalid file format. Please upload an .xlsx file.")
# Logging the raw results for debugging.
logger.info(f"Extracted Result: {result}")
# Reading file
importer = SampleSpreadsheetImporter()
validated_model = importer.import_spreadsheet(file)
logger.info(f"Validated model: {validated_model}")
# Extract and respond with detailed information.
dewars = list(set(sample['dewarname'] for sample in result))
pucks = list(set(sample['puckname'] for sample in result))
samples = list(set(sample['crystalname'] for sample in result))
dewars = {sample['dewarname'] for sample in validated_model if 'dewarname' in sample}
pucks = {sample['puckname'] for sample in validated_model if 'puckname' in sample}
samples = {sample['crystalname'] for sample in validated_model if 'crystalname' in sample}
# Log the extracted names.
logger.info(f"Dewars: {dewars}")
logger.info(f"Pucks: {pucks}")
logger.info(f"Samples: {samples}")
# Logging the sets of names
logger.info(f"Dewar Names: {dewars}")
logger.info(f"Puck Names: {pucks}")
logger.info(f"Sample Names: {samples}")
response_data = {
# Forming structured response
response = {
"dewars_count": len(dewars),
"dewars": dewars,
"dewars": list(dewars),
"pucks_count": len(pucks),
"pucks": pucks,
"pucks": list(pucks),
"samples_count": len(samples),
"samples": samples, # Ensure lists include detailed names
"dewar_names": dewars, # Redundant but for clarity in the frontend
"puck_names": pucks, # Redundant but for clarity in the frontend
"crystal_names": samples # Redundant but for clarity in the frontend
"samples": list(samples)
}
# Log the final response for debugging.
logger.info(f"Final response: {response_data}")
return response_data
logger.info(f"Returning response: {response}")
return response
except SpreadsheetImportError as e:
logger.error(f"Spreadsheet import error: {str(e)}")
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error(f"Failed to process file: {str(e)}")
raise HTTPException(status_code=400, detail="Failed to upload file. Please try again.")
raise HTTPException(status_code=500, detail=f"Failed to upload file. Please try again. {str(e)}")

View File

@ -1,35 +0,0 @@
# app/routers/upload.py
from fastapi import APIRouter, UploadFile, File, HTTPException
import os
router = APIRouter()
@router.post("/upload")
async def upload_file(file: UploadFile = File(...)):
if not file.filename.endswith('.xlsx'):
raise HTTPException(status_code=400, detail="Invalid file format. Please upload an .xlsx file.")
save_path = os.path.join("uploads", file.filename)
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, "wb") as buffer:
buffer.write(await file.read())
# Validate the file (add your validation logic here)
is_valid, summary, error = validate_file(save_path)
if not is_valid:
raise HTTPException(status_code=400, detail=error)
return summary
def validate_file(file_path: str):
# Implement your file validation logic here
# For demo purpose, assuming it always succeeds
summary = {
"dewars": 5,
"pucks": 10,
"samples": 100,
}
return True, summary, None

View File

@ -1,13 +1,6 @@
"""
Data model and validation for mandatory and single sample rows from
spreadsheet. Can be imported by sample_importer.py and database services.
"""
import re
from typing import Any, Optional, Union
from pydantic import BaseModel, Field, validator
from pydantic import BaseModel, Field, field_validator, AliasChoices
from typing_extensions import Annotated
@ -27,7 +20,7 @@ class SpreadsheetModel(BaseModel):
),
]
positioninpuck: int
priority: Optional[str]
priority: Optional[int]
comments: Optional[str]
pinbarcode: Optional[str]
directory: Optional[str]
@ -55,20 +48,22 @@ class SpreadsheetModel(BaseModel):
autoprocextraparams: Any = ""
chiphiangles: Any = ""
@validator("dewarname", "puckname")
def dewarname_puckname_characters(cls, v, **kwargs):
@field_validator('dewarname', 'puckname', mode="before")
@classmethod
def dewarname_puckname_characters(cls, v):
if v:
assert (
len(str(v)) > 0
), f"""" {v} " is not valid.
value must be provided for all samples in spreadsheet."""
), f"""" {v} " is not valid. Value must be provided for all samples in the spreadsheet."""
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
v = re.sub(r"\.0$", "", v)
return v.upper()
@validator("crystalname")
def parameter_characters(cls, v, **kwargs):
@field_validator('crystalname', mode="before")
@classmethod
def parameter_characters(cls, v):
v = str(v).replace(" ", "_")
if re.search("\n", v):
assert v.isalnum(), "is not valid. newline character detected."
@ -78,8 +73,9 @@ class SpreadsheetModel(BaseModel):
v = re.sub(r"\.0$", "", v)
return v
@validator("directory")
def directory_characters(cls, v, **kwargs):
@field_validator('directory', mode="before")
@classmethod
def directory_characters(cls, v):
if v:
v = str(v).strip("/").replace(" ", "_")
if re.search("\n", v):
@ -116,8 +112,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("positioninpuck", pre=True)
def positioninpuck_possible(cls, v, **kwargs):
@field_validator('positioninpuck', mode="before")
@classmethod
def positioninpuck_possible(cls, v):
if v:
try:
v = int(float(v))
@ -134,31 +131,37 @@ class SpreadsheetModel(BaseModel):
raise ValueError("Value must be provided. Value must be from 1 to 16.")
return v
@validator("pucklocationindewar")
def pucklocationindewar_convert_to_int(cls, v, **kwargs):
return int(float(v)) if v else v
@field_validator('pucklocationindewar', mode="before")
@classmethod
def pucklocationindewar_convert_to_str(cls, v):
if v == "Unipuck":
return v
try:
return str(int(float(v)))
except ValueError:
raise ValueError(f"Value error, could not convert string to float: '{v}'")
@validator("priority")
def priority_positive(cls, v, **kwargs):
if v:
@field_validator('priority', mode="before")
@classmethod
def priority_positive(cls, v):
if v is not None:
v = str(v).strip()
v = re.sub(r"\.0$", "", v)
try:
if not int(v) > 0:
if int(v) <= 0:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer."""
f" '{v}' is not valid. Value must be a positive integer."
)
elif int(v) > 0:
v = int(v)
except (ValueError, TypeError) as e:
raise ValueError(
f"""" {v} " is not valid.
value must be a positive integer."""
f" '{v}' is not valid. Value must be a positive integer."
) from e
return v
@validator("aperture")
def aperture_selection(cls, v, **kwargs):
@field_validator('aperture', mode="before")
@classmethod
def aperture_selection(cls, v):
if v:
try:
v = int(float(v))
@ -174,15 +177,17 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator(
@field_validator(
"oscillation",
"exposure",
"totalrange",
"targetresolution",
"rescutvalue",
"userresolution",
mode="before"
)
def parameter_positive_float(cls, v, **kwargs):
@classmethod
def parameter_positive_float(cls, v):
if v:
try:
v = float(v)
@ -198,8 +203,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("transmission")
def tranmission_fraction(cls, v, **kwargs):
@field_validator('transmission', mode="before")
@classmethod
def tranmission_fraction(cls, v):
if v:
try:
v = float(v)
@ -217,8 +223,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("datacollectiontype")
def datacollectiontype_allowed(cls, v, **kwargs):
@field_validator('datacollectiontype', mode="before")
@classmethod
def datacollectiontype_allowed(cls, v):
if v:
v = v.lower()
allowed = ["standard", "serial-xtal", "multi-orientation"]
@ -229,8 +236,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("processingpipeline")
def processingpipeline_allowed(cls, v, **kwargs):
@field_validator('processingpipeline', mode="before")
@classmethod
def processingpipeline_allowed(cls, v):
if v:
v = v.lower()
allowed = ["gopy", "autoproc", "xia2dials"]
@ -241,8 +249,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("spacegroupnumber")
def spacegroupnumber_integer(cls, v, **kwargs):
@field_validator('spacegroupnumber', mode="before")
@classmethod
def spacegroupnumber_integer(cls, v):
if v:
try:
v = int(float(v))
@ -258,8 +267,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("cellparameters")
def cellparameters_positive_floats(cls, v, **kwargs):
@field_validator('cellparameters', mode="before")
@classmethod
def cellparameters_positive_floats(cls, v):
if v:
splitted = str(v).split(" ")
if len(splitted) != 6:
@ -279,8 +289,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("rescutkey")
def rescutkey_allowed(cls, v, **kwargs):
@field_validator('rescutkey', mode="before")
@classmethod
def rescutkey_allowed(cls, v):
if v:
v = v.lower()
allowed = ["is", "cchalf"]
@ -288,8 +299,9 @@ class SpreadsheetModel(BaseModel):
raise ValueError(f"' {v} ' is not valid. value must be ' {allowed} '.")
return v
@validator("autoprocfull", "procfull", "adpenabled", "noano", "ffcscampaign")
def boolean_allowed(cls, v, **kwargs):
@field_validator('autoprocfull', 'procfull', 'adpenabled', 'noano', 'ffcscampaign', mode="before")
@classmethod
def boolean_allowed(cls, v):
if v:
v = v.title()
allowed = ["False", "True"]
@ -300,8 +312,9 @@ class SpreadsheetModel(BaseModel):
)
return v
@validator("trustedhigh")
def trusted_float(cls, v, **kwargs):
@field_validator('trustedhigh', mode="before")
@classmethod
def trusted_float(cls, v):
if v:
try:
v = float(v)
@ -319,8 +332,9 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator("proteinname")
def proteinname_characters(cls, v, **kwargs):
@field_validator('proteinname', mode="before")
@classmethod
def proteinname_characters(cls, v):
if v:
v = str(v).replace(" ", "_")
if re.search("\n", v):
@ -331,12 +345,13 @@ class SpreadsheetModel(BaseModel):
v = re.sub(r"\.0$", "", v)
return v
@validator("chiphiangles")
def chiphiangles_value(cls, v, **kwargs):
@field_validator('chiphiangles', mode="before")
@classmethod
def chiphiangles_value(cls, v):
if v:
try:
v = str(v)
re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
v = re.sub(r"(^\s*\[\s*|\s*\]\s*$)", "", v.strip())
list_of_strings = re.findall(r"\(.*?\)", v)
list_of_tuples = []
for el in list_of_strings:
@ -352,7 +367,7 @@ class SpreadsheetModel(BaseModel):
) from e
return v
@validator(
@field_validator(
"priority",
"comments",
"pinbarcode",
@ -380,12 +395,19 @@ class SpreadsheetModel(BaseModel):
"ffcscampaign",
"autoprocextraparams",
"chiphiangles",
mode="before"
)
def set_default_emptystring(cls, v, **kwargs):
@classmethod
def set_default_emptystring(cls, v):
return v or ""
class Config:
anystr_strip_whitespace = True
str_strip_whitespace = True
aliases = {
'dewarname': 'dewarname',
'puckname': 'puckname',
'crystalname': 'crystalname',
}
class TELLModel(SpreadsheetModel):
@ -397,29 +419,3 @@ class TELLModel(SpreadsheetModel):
puck_number: int
prefix: Optional[str]
folder: Optional[str]
"""
Following params appended in teller.py for updating SDU sample model
class SDUTELLModel(TELLModel):
sdudaq: str
sdudiffcenter: str
sduopticalcenter: str
sdumount: str
sdusafetycheck: str
Following params returned in the format expected by tell.set_samples_info()
{
"userName": user,
"dewarName": sample["dewarname"],
"puckName": "", # FIXME at the moment this field is useless
"puckType": "Unipuck",
"puckAddress": sample["puckaddress"],
"puckBarcode": sample["puckname"],
"sampleBarcode": sample.get("pinbarcode", ""),
"sampleMountCount": sample["samplemountcount"],
"sampleName": sample["crystalname"],
"samplePosition": sample["positioninpuck"],
"sampleStatus": sample["samplestatus"],
}
"""

View File

@ -1,43 +1,58 @@
# app/services/spreadsheet_service.py
import logging
import openpyxl
from pydantic import ValidationError, parse_obj_as
from typing import List
from app.sample_models import SpreadsheetModel, TELLModel
from app.sample_models import SpreadsheetModel
from io import BytesIO
UNASSIGNED_PUCKADDRESS = "---"
logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=logging.DEBUG) # Change to DEBUG level to see more logs
logger = logging.getLogger(__name__)
class SpreadsheetImportError(Exception):
pass
class SampleSpreadsheetImporter:
def __init__(self):
self.filename = None
self.model = None
self.available_puck_positions = []
def _clean_value(self, value):
"""Clean value by converting it to the expected type and stripping whitespace for strings."""
if isinstance(value, str):
return value.strip()
elif isinstance(value, (float, int)):
return str(value) # Always return strings for priority field validation
return value
def import_spreadsheet(self, file):
# Reinitialize state
self.available_puck_positions = [
f"{s}{p}" for s in list("ABCDEF") for p in range(1, 6)
]
self.available_puck_positions.append(UNASSIGNED_PUCKADDRESS)
self.model = []
self.filename = file.filename
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
contents = file.file.read()
file.file.seek(0) # Reset file pointer to the beginning
if not contents:
logger.error("The uploaded file is empty.")
raise SpreadsheetImportError("The uploaded file is empty.")
try:
logger.info(f"Importing spreadsheet from .xlsx file: {self.filename}")
contents = file.file.read() # Read the file contents into memory
file.file.seek(0) # Reset file pointer to the beginning
workbook = openpyxl.load_workbook(BytesIO(contents))
sheet = workbook["Samples"]
except KeyError:
logger.debug("Workbook loaded successfully")
if "Samples" not in workbook.sheetnames:
logger.error("The file is missing 'Samples' worksheet.")
raise SpreadsheetImportError("The file is missing 'Samples' worksheet.")
sheet = workbook["Samples"]
except Exception as e:
logger.error(f"Failed to read the file: {str(e)}")
raise SpreadsheetImportError(f"Failed to read the file: {str(e)}")
return self.process_spreadsheet(sheet)
@ -47,22 +62,47 @@ class SampleSpreadsheetImporter:
# Skip the first 3 rows
rows = list(sheet.iter_rows(min_row=4, values_only=True))
logger.debug(f"Starting to process {len(rows)} rows from the sheet")
for row in rows:
if not rows:
logger.error("The 'Samples' worksheet is empty.")
raise SpreadsheetImportError("The 'Samples' worksheet is empty.")
for index, row in enumerate(rows):
if not row or all(value is None for value in row):
logger.debug(f"Skipping empty row or row with all None values at index {index}.")
continue
try:
sample = {
'dewarname': self._clean_value(row[0]),
'puckname': self._clean_value(row[1]),
'crystalname': self._clean_value(row[4])
'pucklocationindewar': self._clean_value(row[2]) if len(row) > 2 else None,
'positioninpuck': self._clean_value(row[3]) if len(row) > 3 else None,
'crystalname': self._clean_value(row[4]),
'priority': self._clean_value(row[5]) if len(row) > 5 else None,
'comments': self._clean_value(row[6]) if len(row) > 6 else None,
'pinbarcode': self._clean_value(row[7]) if len(row) > 7 else None,
'directory': self._clean_value(row[8]) if len(row) > 8 else None,
}
except IndexError:
logger.error(f"Index error processing row at index {index}: Row has missing values.")
raise SpreadsheetImportError(f"Index error processing row at index {index}: Row has missing values.")
# Skip rows missing essential fields
if not sample['dewarname'] or not sample['puckname'] or not sample['crystalname']:
# Skip rows with missing required fields
logger.debug(f"Skipping row due to missing essential fields: {row}")
continue
model.append(sample)
logger.info(f"Sample processed: {sample}") # Adding log for each processed sample
logger.info(f"Sample processed: {sample}")
if not model:
logger.error("No valid samples found in the spreadsheet.")
raise SpreadsheetImportError("No valid samples found in the spreadsheet.")
logger.info(f"...finished import, got {len(model)} samples")
logger.debug(f"Model data: {model}")
self.model = model
try:
@ -77,16 +117,15 @@ class SampleSpreadsheetImporter:
model = self.model
logger.info(f"...validating {len(model)} samples")
# Log the model before validation
for sample in model:
logger.info(f"Validating sample: {sample}")
validated_model = self.data_model_validation(SpreadsheetModel, model)
# Log the validated model after validation
for sample in validated_model:
logger.info(f"Validated sample: {sample}")
logger.debug(f"Validated model data: {validated_model}")
return validated_model
@staticmethod
@ -94,13 +133,8 @@ class SampleSpreadsheetImporter:
try:
validated = parse_obj_as(List[data_model], model)
except ValidationError as e:
raise SpreadsheetImportError(f"{e.errors()[0]['loc'][2]} => {e.errors()[0]['msg']}")
logger.error(f"Validation error: {e.errors()}")
raise SpreadsheetImportError(f"{e.errors()[0]['loc']} => {e.errors()[0]['msg']}")
validated_model = [dict(value) for value in validated]
return validated_model
@staticmethod
def _clean_value(value):
if isinstance(value, str):
return value.strip()
return value # For other types (int, float, None, etc.), return value as is or handle accordingly

Binary file not shown.

View File

@ -13,7 +13,7 @@ import CloseIcon from '@mui/icons-material/Close';
import DownloadIcon from '@mui/icons-material/Download';
import UploadFileIcon from '@mui/icons-material/UploadFile';
import logo from '../assets/Heidi-logo.png';
import { OpenAPI, UploadService } from '../../openapi';
import { OpenAPI, SpreadsheetService } from '../../openapi';
import type { Body_upload_file_upload_post } from '../../openapi/models/Body_upload_file_upload_post';
interface UploadDialogProps {
@ -59,7 +59,7 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose }) => {
try {
// Use the generated OpenAPI client UploadService method
const response = await UploadService.uploadFileUploadPost(formData);
const response = await SpreadsheetService.uploadFileUploadPost(formData);
console.log('File summary response from backend:', response);
console.log('Dewars:', response.dewars);
@ -87,31 +87,13 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose }) => {
<Box display="flex" flexDirection="column" alignItems="center" mb={2}>
<img src={logo} alt="Logo" style={{ width: 200, marginBottom: 16 }} />
<Typography variant="subtitle1">Latest Spreadsheet Template Version 6</Typography>
<Typography variant="body2" color="textSecondary">
Last update: October 18, 2024
</Typography>
<Button
variant="outlined"
startIcon={<DownloadIcon />}
href="/path/to/template.xlsx"
download
sx={{ mt: 1 }}
>
<Typography variant="body2" color="textSecondary">Last update: October 18, 2024</Typography>
<Button variant="outlined" startIcon={<DownloadIcon />} href="/path/to/template.xlsx" download sx={{ mt: 1 }}>
Download XLSX
</Button>
<Typography variant="subtitle1" sx={{ mt: 3 }}>
Latest Spreadsheet Instructions Version 2.3
</Typography>
<Typography variant="body2" color="textSecondary">
Last updated: October 18, 2024
</Typography>
<Button
variant="outlined"
startIcon={<DownloadIcon />}
href="/path/to/instructions.pdf"
download
sx={{ mt: 1 }}
>
<Typography variant="subtitle1" sx={{ mt: 3 }}>Latest Spreadsheet Instructions Version 2.3</Typography>
<Typography variant="body2" color="textSecondary">Last updated: October 18, 2024</Typography>
<Button variant="outlined" startIcon={<DownloadIcon />} href="/path/to/instructions.pdf" download sx={{ mt: 1 }}>
Download PDF
</Button>
</Box>
@ -148,9 +130,7 @@ const UploadDialog: React.FC<UploadDialogProps> = ({ open, onClose }) => {
</Box>
</DialogContent>
<DialogActions>
<Button onClick={onClose} color="primary">
Close
</Button>
<Button onClick={onClose} color="primary">Close</Button>
</DialogActions>
</Dialog>
);