Files
grafana-dashb-save/dashboards/Merlin7/Merlin7_Slurm_Native_OpenMetrics.json
gitea-actions[bot] 226f8b4aaf grafana: update resources
Changed files: dashboards/General/lac_test.json,dashboards/Merlin7/Merlin7_Slurm_Native_OpenMetrics.json
2026-06-01 18:00:35 +00:00

4494 lines
113 KiB
JSON

{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "Slurm derived monitoring from slurmctrld metrics and the open source slurm exporter metrics.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": false,
"keepTime": false,
"tags": [],
"targetBlank": true,
"title": "Commit to git",
"tooltip": "",
"type": "link",
"url": "https://gitea.psi.ch/HPCE/grafana-dashb-save/actions?workflow=save-grafana-dashboards.yaml&actor=0&status=0"
}
],
"liveNow": false,
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 127,
"title": "Cluster Summary",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 1
},
"id": 1,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"range": true,
"refId": "A"
}
],
"title": "Running Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
},
{
"color": "yellow",
"value": 0.5
},
{
"color": "green",
"value": 0.7
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 1
},
"id": 5,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"range": true,
"refId": "A"
}
],
"title": "CPU Utilz.",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 6,
"y": 1
},
"id": 6,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"range": true,
"refId": "A"
}
],
"title": "Memory Utilz.",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [
{
"options": {
"alloc": {
"color": "dark-green",
"index": 0
},
"comp": {
"color": "green",
"index": 4
},
"idle": {
"color": "blue",
"index": 1
},
"inval": {
"color": "light-red",
"index": 7
},
"maint": {
"color": "dark-orange",
"index": 6
},
"plnd": {
"color": "purple",
"index": 2
}
},
"type": "value"
},
{
"options": {
"pattern": "mix-?",
"result": {
"color": "yellow",
"index": 3
}
},
"type": "regex"
},
{
"options": {
"pattern": "dr[an]",
"result": {
"color": "dark-red",
"index": 5
}
},
"type": "regex"
}
],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": 0
}
]
}
}
},
"gridPos": {
"h": 12,
"w": 14,
"x": 10,
"y": 1
},
"id": 125,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "/^state$/",
"limit": 150,
"values": true
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"exemplar": false,
"expr": "slurm_node_state{cluster=\"$slurm_cluster\"}",
"instant": true,
"legendFormat": "__auto",
"range": false,
"refId": "B"
}
],
"title": "Node State",
"transformations": [
{
"id": "labelsToFields",
"options": {
"keepLabels": [
"state",
"node"
],
"mode": "columns"
}
},
{
"id": "filterFieldsByName",
"options": {
"byVariable": false,
"include": {
"names": [
"state",
"node"
]
}
}
}
],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "orange",
"value": 1000
},
{
"color": "red",
"value": 5000
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 5
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"range": true,
"refId": "A"
}
],
"title": "Pending Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 5
},
"id": 4,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"range": true,
"refId": "A"
}
],
"title": "Total CPUs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "down"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 1
}
]
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "drain"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 1
}
]
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "idle",
"scope": "series"
},
"properties": [
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "blue",
"value": 1
}
]
}
}
]
}
]
},
"gridPos": {
"h": 4,
"w": 4,
"x": 6,
"y": 5
},
"id": 7,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_idle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "idle",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_mixed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "mix",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "alloc",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_down{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "down",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_draining{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} + slurm_nodes_drained{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} ",
"instant": false,
"legendFormat": "drain",
"range": true,
"refId": "E"
}
],
"title": "Nodes (idle/mix/alloc/down)",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 1
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 0,
"y": 9
},
"id": 114,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_hold{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"range": true,
"refId": "A"
}
],
"title": "Held Jobs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 3,
"y": 9
},
"id": 3,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"range": true,
"refId": "A"
}
],
"title": "Allocated CPUs",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 100
}
]
}
}
},
"gridPos": {
"h": 4,
"w": 3,
"x": 6,
"y": 9
},
"id": 8,
"options": {
"colorMode": "background",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"percentChangeColorMode": "standard",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"showPercentChange": false,
"textMode": "auto",
"wideLayout": true
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_slurmdbd_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"range": true,
"refId": "A"
}
],
"title": "DBD Queue",
"type": "stat"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 13
},
"id": 128,
"title": "Cluster summary graphs",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"dash": [
10,
10
],
"fill": "dash"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "allocated CPUs"
},
"properties": [
{
"id": "custom.fillOpacity",
"value": 20
},
{
"id": "custom.lineStyle",
"value": null
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 7,
"x": 0,
"y": 14
},
"id": 108,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"legendFormat": "allocated CPUs",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"instant": false,
"legendFormat": "Total",
"range": true,
"refId": "B"
}
],
"title": "Sum Allocated CPUs",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"dash": [
10,
10
],
"fill": "dash"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
},
"unit": "mbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "allocated Memory"
},
"properties": [
{
"id": "custom.fillOpacity",
"value": 20
},
{
"id": "custom.lineStyle",
"value": null
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 7,
"x": 7,
"y": 14
},
"id": 109,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"legendFormat": "allocated Memory",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"instant": false,
"legendFormat": "Total",
"range": true,
"refId": "B"
}
],
"title": "Sum Allocated Memory",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"description": "Show the level of allocation of available resources",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"axisSoftMin": 0,
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 33,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineStyle": {
"fill": "solid"
},
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "blue",
"value": 0
}
]
},
"unit": "percentunit"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "100%",
"scope": "series"
},
"properties": [
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "100%"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "%Memory"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "%CPU"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "A100 GPU"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "g200 GPU"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "semi-dark-purple",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 7,
"x": 14,
"y": 14
},
"id": 126,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"legendFormat": "%CPU",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"instant": false,
"legendFormat": "%Memory",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "1",
"instant": false,
"legendFormat": "100%",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_exporter_gpus_alloc{type=\"nvidia_a100-sxm4-80gb\",slurm_cluster=\"$slurm_cluster\"} / slurm_exporter_gpus_total{type=\"nvidia_a100-sxm4-80gb\",slurm_cluster=\"$slurm_cluster\"}",
"instant": false,
"legendFormat": "A100 GPU",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_exporter_gpus_alloc{type=\"gh200\",slurm_cluster=\"$slurm_cluster\"} / slurm_exporter_gpus_total{type=\"gh200\",slurm_cluster=\"$slurm_cluster\"}",
"instant": false,
"legendFormat": "g200 GPU",
"range": true,
"refId": "E"
}
],
"title": "%Resources allocated",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "total CPUs",
"scope": "series"
},
"properties": [
{
"id": "custom.drawStyle",
"value": "line"
},
{
"id": "custom.fillOpacity",
"value": 0
},
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
},
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 9,
"x": 0,
"y": 22
},
"id": 111,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
"legendFormat": "total CPUs",
"range": true,
"refId": "B"
}
],
"title": "CPU Allocation by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "slurm_nodes_down"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "dark-red",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "slurm_nodes_draining"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "yellow",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "slurm_nodes_drained"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "dark-orange",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 9,
"x": 9,
"y": 22
},
"id": 113,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_draining{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_drained{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_nodes_down{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{__name__}}",
"range": true,
"refId": "C"
}
],
"title": "Nodes drained/draining/down",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 30
},
"id": 129,
"title": "Job Trends",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "running"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "green",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "pending"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "orange",
"mode": "fixed"
}
}
]
},
{
"matcher": {
"id": "byName",
"options": "completing"
},
"properties": [
{
"id": "color",
"value": {
"fixedColor": "blue",
"mode": "fixed"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 31
},
"id": 10,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "running",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "pending",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_completing{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "completing",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_suspended{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "suspended",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_jobs_configuring{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "configuring",
"range": true,
"refId": "E"
}
],
"title": "Job State Trends",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 31
},
"id": 11,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_completed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "completed",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_started{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "started",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_submitted{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "submitted",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_failed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "failed",
"range": true,
"refId": "D"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_sdiag_jobs_canceled{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "cancelled",
"range": true,
"refId": "E"
}
],
"title": "Job Throughput (rate/min)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 39
},
"id": 130,
"title": "user monitoring",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 40
},
"id": 123,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cancelled{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Cancelled jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 40
},
"id": 121,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_failed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Failed jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 48
},
"id": 116,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Jobs running per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 48
},
"id": 120,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Pending jobs per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 56
},
"id": 122,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} ",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "CPUs allocated per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "bars",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 56
},
"id": 119,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Allocated memory per user",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 64
},
"id": 117,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Users' avg CPUs per job",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 64
},
"id": 118,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Users' avg. memory per Job",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 72
},
"id": 124,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.1+security-01",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"instant": false,
"legendFormat": "{{username}}",
"range": true,
"refId": "A"
}
],
"title": "Users' avg. memory per alloc. CPU",
"type": "timeseries"
},
{
"collapsed": true,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 80
},
"id": 131,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "slurm_node_cpus"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 81
},
"id": 20,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}} (alloc)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}} (total)",
"range": true,
"refId": "B"
}
],
"title": "CPU Allocation by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "mbytes"
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "slurm_node_memory_bytes"
},
"properties": [
{
"id": "custom.lineStyle",
"value": {
"dash": [
10,
10
],
"fill": "dash"
}
}
]
}
]
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 81
},
"id": 21,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}} (alloc)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}} (total)",
"range": true,
"refId": "B"
}
],
"title": "Memory Allocation by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 89
},
"id": 22,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}}",
"range": true,
"refId": "A"
}
],
"title": "CPU Utilization by Node",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 20,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"max": 1,
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "yellow",
"value": 0.7
},
{
"color": "red",
"value": 0.9
}
]
},
"unit": "percentunit"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 89
},
"id": 23,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{node}}",
"range": true,
"refId": "A"
}
],
"title": "Memory Utilization by Node",
"type": "timeseries"
}
],
"title": "Per-Node Resources",
"type": "row"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 81
},
"id": 132,
"title": "Per-Partition Status",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 82
},
"id": 30,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
}
],
"title": "Running Jobs by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 82
},
"id": 31,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
}
],
"title": "Pending Jobs by Partition",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 30,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "normal"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "short"
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 82
},
"id": 32,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_partition_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "{{partition}}",
"range": true,
"refId": "A"
}
],
"title": "CPU Allocation by Partition",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 90
},
"id": 133,
"title": "Scheduler Performance",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 91
},
"id": 50,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_sched_mean_cycle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "main scheduler (mean)",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_schedule_cycle_last{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "main scheduler (last)",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_mean_cycle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "backfill (mean)",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_cycle_last{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "backfill (last)",
"range": true,
"refId": "D"
}
],
"title": "Scheduler Cycle Time",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 91
},
"id": 51,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_schedule_queue_len{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "scheduler queue",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_queue_len{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "backfill queue",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_slurmdbd_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "slurmdbd queue",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "agent queue",
"range": true,
"refId": "D"
}
],
"title": "Queue Lengths",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 91
},
"id": 52,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_backfilled_jobs{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "backfilled jobs/min",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "rate(slurm_bf_cycle_cnt{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
"legendFormat": "bf cycles/min",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_depth_mean{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "bf depth (mean)",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_bf_table_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "bf table size",
"range": true,
"refId": "D"
}
],
"title": "Backfill Statistics",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
},
"unit": "µs"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 99
},
"id": 53,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_sdiag_latency{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "sdiag latency",
"range": true,
"refId": "A"
}
],
"title": "RPC Latency",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 2,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"showValues": false,
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": 0
},
{
"color": "red",
"value": 80
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 99
},
"id": 54,
"options": {
"annotations": {
"clustering": -1,
"multiLane": false
},
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "13.0.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_server_thread_cnt{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
"legendFormat": "slurmctld threads",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_thread_cnt{instance=\"merlin7-slurmctld01\"}",
"legendFormat": "agent threads",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"editorMode": "code",
"expr": "slurm_agent_cnt{instance=\"merlin7-slurmctld01\"}",
"legendFormat": "agents",
"range": true,
"refId": "C"
}
],
"title": "Thread Count",
"type": "timeseries"
}
],
"preload": false,
"refresh": "",
"schemaVersion": 42,
"templating": {
"list": [
{
"allowCustomValue": false,
"current": {
"text": "merlin7",
"value": "merlin7"
},
"datasource": {
"type": "prometheus",
"uid": "merlin-mimir"
},
"definition": "label_values(slurm_node_cpus,cluster)",
"hide": 0,
"includeAll": false,
"label": "",
"multi": false,
"name": "slurm_cluster",
"options": [],
"query": {
"qryType": 1,
"query": "label_values(slurm_node_cpus,slurm_cluster)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
"regex": "",
"regexApplyTo": "value",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
]
},
"timezone": "browser",
"title": "Merlin7 Slurm Native OpenMetrics",
"uid": "de7rpsq1merlin7slurm1",
"version": 13
}