diff --git a/backend/app/models.py b/backend/app/models.py index e97ff49..657a5ff 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -313,6 +313,8 @@ class JobStatus(str, enum.Enum): TODO = "todo" SUBMITTED = "submitted" DONE = "done" + TO_CANCEL = "to_cancel" + CANCELLED = "cancelled" FAILED = "failed" diff --git a/backend/app/routers/processing.py b/backend/app/routers/processing.py index 68017f5..4658f9a 100644 --- a/backend/app/routers/processing.py +++ b/backend/app/routers/processing.py @@ -80,6 +80,10 @@ def update_jobs_status(payload: JobsUpdate, db: Session = Depends(get_db)): raise HTTPException(status_code=404, detail="Job not found") + # If status is being updated to "cancelled" + if payload.status == "cancelled": + job.slurm_id = None + # Update the status job.status = payload.status job.slurm_id = payload.slurm_id @@ -93,3 +97,23 @@ def update_jobs_status(payload: JobsUpdate, db: Session = Depends(get_db)): # Return the updated job's info as response return JobsUpdate(job_id=job.id, status=job.status, slurm_id=job.slurm_id) + + +def cleanup_cancelled_jobs(db: Session): + from datetime import datetime + from datetime import timedelta + + """Delete jobs in 'cancelled' state for more than 2 hours.""" + cutoff = datetime.now() - timedelta(hours=2) + print( + f"Cleaning up cancelled jobs older than {cutoff} " + f"(current time: {datetime.now()})" + ) + old_jobs = ( + db.query(JobModel) + .filter(JobModel.status == "cancelled", JobModel.updated_at < cutoff) + .all() + ) + for job in old_jobs: + db.delete(job) + db.commit() diff --git a/backend/main.py b/backend/main.py index e0fbb06..328c368 100644 --- a/backend/main.py +++ b/backend/main.py @@ -122,6 +122,20 @@ if environment == "dev": ssl_heidi.generate_self_signed_cert(cert_path, key_path) +def cleanup_job_loop(): + import time + from app.dependencies import get_db + from app.routers.processing import cleanup_cancelled_jobs + + while True: + db = next(get_db()) + try: + cleanup_cancelled_jobs(db) + finally: + db.close() + time.sleep(3600) # every hour + + @asynccontextmanager async def lifespan(app: FastAPI): print("[INFO] Running application startup tasks...") @@ -174,6 +188,12 @@ async def lifespan(app: FastAPI): load_slots_data(db) + from threading import Thread + + # Start cleanup in background thread + thread = Thread(target=cleanup_job_loop, daemon=True) + thread.start() + yield finally: db.close()