Files
grafana-dashb-save/dashboards/Merlin7/Slurm_Native_OpenMetrics.json
gitea-actions[bot] 76eacf4e2a grafana: update dashboards
Changed files: dashboards/Merlin7/Slurm_Native_OpenMetrics.json
2026-04-15 18:01:01 +00:00

3843 lines
96 KiB
JSON

{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Comprehensive Slurm monitoring using slurmctld native OpenMetrics endpoint (port 6817). Covers cluster summary, job trends, per-node resources, per-partition status, per-user workloads, and scheduler internals (backfill, RPC latency, threads).",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 100,
"panels": [],
"title": "Cluster Summary",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 1
},
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_running{instance=\"$slurm_instance\"}",
"range": true,
"refId": "A"
}
],
"title": "Running Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 0.5
},
{
"color": "green",
"value": 0.7
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 1
},
"id": 5,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{instance=\"$slurm_instance\"}) / sum(slurm_node_cpus{instance=\"$slurm_instance\"})",
"range": true,
"refId": "A"
}
],
"title": "CPU Utilz. (mc nodes)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 6,
"y": 1
},
"id": 6,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_alloc_bytes{instance=\"$slurm_instance\"}) / sum(slurm_node_memory_bytes{instance=\"$slurm_instance\"})",
"range": true,
"refId": "A"
}
],
"title": "Memory Utilz (mc nodes)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "orange",
"value": 1000
},
{
"color": "red",
"value": 5000
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 5
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_pending{instance=\"$slurm_instance\"}",
"range": true,
"refId": "A"
}
],
"title": "Pending Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 5
},
"id": 4,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{instance=\"$slurm_instance\"})",
"range": true,
"refId": "A"
}
],
"title": "Total CPUs (mc nodes)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "down"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 1
}
]
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "drain"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 1
}
]
}
}
]
}
]
},
"gridPos": {
"h": 4,
"w": 4,
"x": 6,
"y": 5
},
"id": 7,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_idle{instance=\"$slurm_instance\"}",
"legendFormat": "idle",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_mixed{instance=\"$slurm_instance\"}",
"legendFormat": "mix",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_alloc{instance=\"$slurm_instance\"}",
"legendFormat": "alloc",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_down{instance=\"$slurm_instance\"}",
"legendFormat": "down",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_draining{instance=\"$slurm_instance\"} + slurm_nodes_drained{instance=\"$slurm_instance\"} ",
"hide": false,
"instant": false,
"legendFormat": "drain",
"range": true,
"refId": "E"
}
],
"title": "Nodes (idle/mix/alloc/down)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 1
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 9
},
"id": 114,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_hold{instance=\"$slurm_instance\"}",
"range": true,
"refId": "A"
}
],
"title": "Held Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 9
},
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{instance=\"$slurm_instance\"})",
"range": true,
"refId": "A"
}
],
"title": "Allocated CPUs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 100
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 4,
"w": 3,
"x": 6,
"y": 9
},
"id": 8,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_slurmdbd_queue_size{instance=\"$slurm_instance\"}",
"range": true,
"refId": "A"
}
],
"title": "DBD Queue",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 13
},
"id": 112,
"panels": [],
"title": "Cluster summary graphs",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"dash": [
10,
10
],
"fill": "dash"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "allocated CPUs"
},
"properties": [
{
"id": "custom.fillOpacity",
"value": 20
},
{
"id": "custom.lineStyle"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 9,
"x": 0,
"y": 14
},
"id": 108,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{instance=\"$slurm_instance\"})",
"legendFormat": "allocated CPUs",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{instance=\"$slurm_instance\"})",
"hide": false,
"instant": false,
"legendFormat": "Total",
"range": true,
"refId": "B"
}
],
"title": "Sum allocated CPUs",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"dash": [
10,
10
],
"fill": "dash"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
},
"unit": "mbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "allocated Memory"
},
"properties": [
{
"id": "custom.fillOpacity",
"value": 20
},
{
"id": "custom.lineStyle"
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 9,
"x": 9,
"y": 14
},
"id": 109,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_alloc_bytes{instance=\"$slurm_instance\"})",
"legendFormat": "allocated Memory",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_bytes{instance=\"$slurm_instance\"})",
"hide": false,
"instant": false,
"legendFormat": "Total",
"range": true,
"refId": "B"
}
],
"title": "Sum Allocated Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "total mc CPUs"
},
"properties": [
{
"id": "custom.drawStyle",
"value": "line"
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 9,
"x": 0,
"y": 22
},
"id": 111,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_cpus_alloc{instance=\"$slurm_instance\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{instance=\"$slurm_instance\"})",
"legendFormat": "total mc CPUs",
"range": true,
"refId": "B"
}
],
"title": "CPU Allocation by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 9,
"x": 9,
"y": 22
},
"id": 113,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_draining{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_drained{instance=\"$slurm_instance\"}",
"hide": false,
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_down{instance=\"$slurm_instance\"}",
"hide": false,
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "C"
}
],
"title": "Nodes drained/draining/down",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 101,
"panels": [],
"title": "Job Trends",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "running"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "pending"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "completing"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 31
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_running{instance=\"$slurm_instance\"}",
"legendFormat": "running",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_pending{instance=\"$slurm_instance\"}",
"legendFormat": "pending",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_completing{instance=\"$slurm_instance\"}",
"legendFormat": "completing",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_suspended{instance=\"$slurm_instance\"}",
"legendFormat": "suspended",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_configuring{instance=\"$slurm_instance\"}",
"legendFormat": "configuring",
"range": true,
"refId": "E"
}
],
"title": "Job State Trends",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 31
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_completed{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "completed",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_started{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "started",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_submitted{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "submitted",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_failed{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "failed",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_canceled{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "cancelled",
"range": true,
"refId": "E"
}
],
"title": "Job Throughput (rate/min)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 39
},
"id": 115,
"panels": [],
"title": "user monitoring",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 40
},
"id": 116,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_running{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Jobs running per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 40
},
"id": 120,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_pending{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Pending jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 48
},
"id": 122,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cpus_alloc{instance=\"$slurm_instance\"} ",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "CPUs allocated per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 48
},
"id": 119,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_memory_alloc{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Allocated memory per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 56
},
"id": 123,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cancelled{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Cancelled jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 56
},
"id": 121,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_failed{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Failed jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 64
},
"id": 117,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cpus_alloc{instance=\"$slurm_instance\"} / slurm_user_jobs_running{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Users' avg CPUs per job",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 64
},
"id": 118,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_memory_alloc{instance=\"$slurm_instance\"} / slurm_user_jobs_running{instance=\"$slurm_instance\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Users' avg. memory per Job",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 72
},
"id": 102,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "slurm_node_cpus"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 241
},
"id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus_alloc{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}} (alloc)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}} (total)",
"range": true,
"refId": "B"
}
],
"title": "CPU Allocation by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "slurm_node_memory_bytes"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 241
},
"id": 21,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_alloc_bytes{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}} (alloc)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_bytes{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}} (total)",
"range": true,
"refId": "B"
}
],
"title": "Memory Allocation by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 249
},
"id": 22,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus_alloc{instance=\"$slurm_instance\"} / slurm_node_cpus{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}}",
"range": true,
"refId": "A"
}
],
"title": "CPU Utilization by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 249
},
"id": 23,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_alloc_bytes{instance=\"$slurm_instance\"} / slurm_node_memory_bytes{instance=\"$slurm_instance\"}",
"legendFormat": "{{node}}",
"range": true,
"refId": "A"
}
],
"title": "Memory Utilization by Node",
"type": "timeseries"
}
],
"title": "Per-Node Resources",
"type": "row"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 73
},
"id": 103,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 290
},
"id": 30,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_running{instance=\"$slurm_instance\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
}
],
"title": "Running Jobs by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 290
},
"id": 31,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_pending{instance=\"$slurm_instance\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
}
],
"title": "Pending Jobs by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 290
},
"id": 32,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_cpus_alloc{instance=\"$slurm_instance\"}",
"legendFormat": "{{partition}} (alloc)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"expr": "slurm_partition_cpus",
"legendFormat": "{{partition}} (total)",
"refId": "B"
}
],
"title": "CPU Allocation by Partition",
"type": "timeseries"
}
],
"title": "Per-Partition Status",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 74
},
"id": 105,
"panels": [],
"title": "Scheduler Performance",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 75
},
"id": 50,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_sched_mean_cycle{instance=\"$slurm_instance\"}",
"legendFormat": "main scheduler (mean)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_schedule_cycle_last{instance=\"$slurm_instance\"}",
"legendFormat": "main scheduler (last)",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_mean_cycle{instance=\"$slurm_instance\"}",
"legendFormat": "backfill (mean)",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_cycle_last{instance=\"$slurm_instance\"}",
"legendFormat": "backfill (last)",
"range": true,
"refId": "D"
}
],
"title": "Scheduler Cycle Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 75
},
"id": 51,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_schedule_queue_len{instance=\"$slurm_instance\"}",
"legendFormat": "scheduler queue",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_queue_len{instance=\"$slurm_instance\"}",
"legendFormat": "backfill queue",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_slurmdbd_queue_size{instance=\"$slurm_instance\"}",
"legendFormat": "slurmdbd queue",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_queue_size{instance=\"$slurm_instance\"}",
"legendFormat": "agent queue",
"range": true,
"refId": "D"
}
],
"title": "Queue Lengths",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 75
},
"id": 52,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_backfilled_jobs{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "backfilled jobs/min",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_bf_cycle_cnt{instance=\"$slurm_instance\"}[5m])*60",
"legendFormat": "bf cycles/min",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_depth_mean{instance=\"$slurm_instance\"}",
"legendFormat": "bf depth (mean)",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_table_size{instance=\"$slurm_instance\"}",
"legendFormat": "bf table size",
"range": true,
"refId": "D"
}
],
"title": "Backfill Statistics",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 83
},
"id": 53,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_sdiag_latency{instance=\"$slurm_instance\"}",
"legendFormat": "sdiag latency",
"range": true,
"refId": "A"
}
],
"title": "RPC Latency",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 83
},
"id": 54,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "12.1.1",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_server_thread_cnt{instance=\"$slurm_instance\"}",
"legendFormat": "slurmctld threads",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_thread_cnt{instance=\"merlin7-slurmctld01\"}",
"legendFormat": "agent threads",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_cnt{instance=\"merlin7-slurmctld01\"}",
"legendFormat": "agents",
"range": true,
"refId": "C"
}
],
"title": "Thread Count",
"type": "timeseries"
}
],
"preload": false,
"schemaVersion": 41,
"tags": [
"slurm",
"hpc",
"openmetrics",
"native"
],
"templating": {
"list": [
{
"current": {
"text": "merlin7-slurmctld01",
"value": "merlin7-slurmctld01"
},
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"definition": "label_values(slurm_jobs_running,instance)",
"description": "Cluster identified by Slurm scheduler",
"name": "slurm_instance",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(slurm_jobs_running,instance)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"type": "query"
}
]
},
"time": {
"from": "now-24h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "Slurm Native OpenMetrics",
"uid": "ba818a63-c187-4860-83ab-59e7e457b77f",
"version": 47
}