Add job cancellation handling and periodic cleanup logic

Introduce new statuses, "to_cancel" and "cancelled", to improve job state tracking. Implement logic to nullify `slurm_id` for cancelled jobs and a background thread to clean up cancelled jobs older than 2 hours. Ensure periodic cleanup runs hourly to maintain database hygiene.
This commit is contained in:
GotthardG
2025-05-02 10:48:54 +02:00
parent a1b857b78a
commit b13a3e23f4
2 changed files with 106 additions and 40 deletions

View File

@ -5,7 +5,6 @@ from fastapi.encoders import jsonable_encoder
from sqlalchemy.orm import Session
from starlette.responses import StreamingResponse
from app.models import (
JobStatus,
Jobs as JobModel,
ExperimentParameters as ExperimentParametersModel,
Sample as SampleModel,
@ -16,46 +15,46 @@ from app.dependencies import get_db
router = APIRouter()
async def job_event_generator(db: Session):
async def job_event_generator(get_db):
while True:
jobs = db.query(JobModel).filter(JobModel.status == JobStatus.TODO).all()
job_items = []
for job in jobs:
sample = db.query(SampleModel).filter_by(id=job.sample_id).first()
experiment = (
db.query(ExperimentParametersModel)
.filter(
ExperimentParametersModel.sample_id == sample.id,
ExperimentParametersModel.id == job.run_id,
# Open a new session for this iteration and close it at the end
with next(get_db()) as db:
jobs = db.query(JobModel).all()
job_items = []
for job in jobs:
sample = db.query(SampleModel).filter_by(id=job.sample_id).first()
experiment = (
db.query(ExperimentParametersModel)
.filter(
ExperimentParametersModel.sample_id == sample.id,
ExperimentParametersModel.id == job.run_id,
)
.first()
)
.first()
)
job_item = JobsResponse(
job_id=job.id,
sample_id=sample.id,
run_id=job.run_id,
sample_name=sample.sample_name,
status=job.status,
type=experiment.type,
created_at=job.created_at,
updated_at=job.updated_at,
data_collection_parameters=sample.data_collection_parameters,
experiment_parameters=experiment.beamline_parameters
if experiment
else None,
filepath=experiment.dataset.get("filepath")
if experiment and experiment.dataset
else None,
slurm_id=job.slurm_id,
)
job_item = JobsResponse(
job_id=job.id,
sample_id=sample.id,
run_id=job.run_id,
sample_name=sample.sample_name,
status=job.status,
type=experiment.type if experiment else None,
created_at=job.created_at,
updated_at=job.updated_at,
data_collection_parameters=sample.data_collection_parameters,
experiment_parameters=experiment.beamline_parameters
if experiment
else None,
filepath=experiment.dataset.get("filepath")
if experiment and experiment.dataset
else None,
slurm_id=job.slurm_id,
)
job_items.append(job_item)
job_items.append(job_item)
if job_items:
# Use Pydantic's .json() for each item, if you need a fine structure, or:
serialized = jsonable_encoder(job_items)
yield f"data: {json.dumps(serialized)}\n\n"
if job_items:
serialized = jsonable_encoder(job_items)
yield f"data: {json.dumps(serialized)}\n\n"
await asyncio.sleep(5)
@ -64,8 +63,13 @@ async def job_event_generator(db: Session):
@router.get("/jobs/stream")
async def stream_jobs(db: Session = Depends(get_db)):
return StreamingResponse(job_event_generator(db), media_type="text/event-stream")
async def stream_jobs():
# Pass the dependency itself, not an active session
from app.dependencies import get_db
return StreamingResponse(
job_event_generator(get_db), media_type="text/event-stream"
)
@router.post(