Files
2026-05-21 14:04:31 +02:00

110 lines
2.9 KiB
JSON

{
"metadata": {
"name": null,
"namespace": "default",
"uid": null,
"resourceVersion": "2",
"labels": {
"grafana.app/folder": "bf7geueotdmgwc",
"grafana.com/group": "10m-check-interval",
"grafana.com/group-index": "1"
},
"annotations": {
"grafana.app/folder": "bf7geueotdmgwc",
"grafana.app/updatedBy": "ff52p5ojz4b28c",
"grafana.app/updatedTimestamp": "2026-05-20T14:34:40Z",
"grafana.com/provenance": "",
"grafana.com/updateTimestamp": "2026-05-20T14:34:40Z",
"grafana.com/updatedBy": "ff52p5ojz4b28c"
}
},
"spec": {
"title": "SlurmNodeUnstable",
"trigger": {
"interval": "10m"
},
"labels": {
"component": "slurm",
"type": "state"
},
"annotations": {
"__dashboardUid__": "de7rpsq1merlin7slurm1",
"__panelId__": "7",
"description": "A node or group of nodes is in an unstable state and requires intervention by an administrator. This could be due to a hardware issue, leading to state notifications like down or unknown, or it can be causes by some scheduling issue, like misbehaving job.",
"summary": "A node or group of nodes is in an unstable state!"
},
"for": "10m0s",
"noDataState": "KeepLast",
"execErrState": "Error",
"notificationSettings": {
"receiver": "Merlin Alarms"
},
"expressions": {
"A": {
"relativeTimeRange": {
"from": "10m0s",
"to": "0s"
},
"datasourceUID": "merlin-mimir",
"model": {
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_state{state=~\"inval|drain|drng|fail|failg|down|unk\"}) by (node,cluster,state)",
"instant": true,
"intervalMs": 1000,
"legendFormat": "__auto",
"maxDataPoints": 43200,
"range": false,
"refId": "A"
}
},
"C": {
"queryType": "expression",
"model": {
"conditions": [
{
"evaluator": {
"params": [
0
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"C"
]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"datasource": {
"type": "__expr__",
"uid": "__expr__"
},
"expression": "A",
"intervalMs": 1000,
"maxDataPoints": 43200,
"refId": "C",
"type": "threshold"
},
"source": true
}
},
"panelRef": {
"dashboardUID": "de7rpsq1merlin7slurm1",
"panelID": 7
}
},
"status": {}
}