Add job cancellation handling and periodic cleanup logic

Introduce new statuses, "to_cancel" and "cancelled", to improve job state tracking. Implement logic to nullify `slurm_id` for cancelled jobs and a background thread to clean up cancelled jobs older than 2 hours. Ensure periodic cleanup runs hourly to maintain database hygiene.
This commit is contained in:
GotthardG
2025-05-01 15:17:42 +02:00
parent 9e875c5a04
commit a1b857b78a
3 changed files with 46 additions and 0 deletions

View File

@ -313,6 +313,8 @@ class JobStatus(str, enum.Enum):
TODO = "todo"
SUBMITTED = "submitted"
DONE = "done"
TO_CANCEL = "to_cancel"
CANCELLED = "cancelled"
FAILED = "failed"

View File

@ -80,6 +80,10 @@ def update_jobs_status(payload: JobsUpdate, db: Session = Depends(get_db)):
raise HTTPException(status_code=404, detail="Job not found")
# If status is being updated to "cancelled"
if payload.status == "cancelled":
job.slurm_id = None
# Update the status
job.status = payload.status
job.slurm_id = payload.slurm_id
@ -93,3 +97,23 @@ def update_jobs_status(payload: JobsUpdate, db: Session = Depends(get_db)):
# Return the updated job's info as response
return JobsUpdate(job_id=job.id, status=job.status, slurm_id=job.slurm_id)
def cleanup_cancelled_jobs(db: Session):
from datetime import datetime
from datetime import timedelta
"""Delete jobs in 'cancelled' state for more than 2 hours."""
cutoff = datetime.now() - timedelta(hours=2)
print(
f"Cleaning up cancelled jobs older than {cutoff} "
f"(current time: {datetime.now()})"
)
old_jobs = (
db.query(JobModel)
.filter(JobModel.status == "cancelled", JobModel.updated_at < cutoff)
.all()
)
for job in old_jobs:
db.delete(job)
db.commit()