226f8b4aaf
Changed files: dashboards/General/lac_test.json,dashboards/Merlin7/Merlin7_Slurm_Native_OpenMetrics.json
4494 lines
113 KiB
JSON
4494 lines
113 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "grafana",
|
|
"uid": "-- Grafana --"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "Slurm derived monitoring from slurmctrld metrics and the open source slurm exporter metrics.",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"graphTooltip": 1,
|
|
"id": null,
|
|
"links": [
|
|
{
|
|
"asDropdown": true,
|
|
"icon": "external link",
|
|
"includeVars": false,
|
|
"keepTime": false,
|
|
"tags": [],
|
|
"targetBlank": true,
|
|
"title": "Commit to git",
|
|
"tooltip": "",
|
|
"type": "link",
|
|
"url": "https://gitea.psi.ch/HPCE/grafana-dashb-save/actions?workflow=save-grafana-dashboards.yaml&actor=0&status=0"
|
|
}
|
|
],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 127,
|
|
"title": "Cluster Summary",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 1
|
|
},
|
|
"id": 1,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Running Jobs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.5
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 0.7
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 1
|
|
},
|
|
"id": 5,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPU Utilz.",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.7
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 0.9
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 1
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Utilz.",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"alloc": {
|
|
"color": "dark-green",
|
|
"index": 0
|
|
},
|
|
"comp": {
|
|
"color": "green",
|
|
"index": 4
|
|
},
|
|
"idle": {
|
|
"color": "blue",
|
|
"index": 1
|
|
},
|
|
"inval": {
|
|
"color": "light-red",
|
|
"index": 7
|
|
},
|
|
"maint": {
|
|
"color": "dark-orange",
|
|
"index": 6
|
|
},
|
|
"plnd": {
|
|
"color": "purple",
|
|
"index": 2
|
|
}
|
|
},
|
|
"type": "value"
|
|
},
|
|
{
|
|
"options": {
|
|
"pattern": "mix-?",
|
|
"result": {
|
|
"color": "yellow",
|
|
"index": 3
|
|
}
|
|
},
|
|
"type": "regex"
|
|
},
|
|
{
|
|
"options": {
|
|
"pattern": "dr[an]",
|
|
"result": {
|
|
"color": "dark-red",
|
|
"index": 5
|
|
}
|
|
},
|
|
"type": "regex"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 12,
|
|
"w": 14,
|
|
"x": 10,
|
|
"y": 1
|
|
},
|
|
"id": 125,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "area",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "/^state$/",
|
|
"limit": 150,
|
|
"values": true
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"exemplar": false,
|
|
"expr": "slurm_node_state{cluster=\"$slurm_cluster\"}",
|
|
"instant": true,
|
|
"legendFormat": "__auto",
|
|
"range": false,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Node State",
|
|
"transformations": [
|
|
{
|
|
"id": "labelsToFields",
|
|
"options": {
|
|
"keepLabels": [
|
|
"state",
|
|
"node"
|
|
],
|
|
"mode": "columns"
|
|
}
|
|
},
|
|
{
|
|
"id": "filterFieldsByName",
|
|
"options": {
|
|
"byVariable": false,
|
|
"include": {
|
|
"names": [
|
|
"state",
|
|
"node"
|
|
]
|
|
}
|
|
}
|
|
}
|
|
],
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 1000
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 5000
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 5
|
|
},
|
|
"id": 2,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Pending Jobs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 5
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Total CPUs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "down"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "thresholds",
|
|
"value": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "drain"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "thresholds",
|
|
"value": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "idle",
|
|
"scope": "series"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "thresholds",
|
|
"value": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "blue",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 6,
|
|
"y": 5
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_idle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "idle",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_mixed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "mix",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "alloc",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_down{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "down",
|
|
"range": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_draining{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} + slurm_nodes_drained{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} ",
|
|
"instant": false,
|
|
"legendFormat": "drain",
|
|
"range": true,
|
|
"refId": "E"
|
|
}
|
|
],
|
|
"title": "Nodes (idle/mix/alloc/down)",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 9
|
|
},
|
|
"id": 114,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_hold{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Held Jobs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 9
|
|
},
|
|
"id": 3,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Allocated CPUs",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 100
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 9
|
|
},
|
|
"id": 8,
|
|
"options": {
|
|
"colorMode": "background",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"percentChangeColorMode": "standard",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showPercentChange": false,
|
|
"textMode": "auto",
|
|
"wideLayout": true
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_slurmdbd_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "DBD Queue",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 13
|
|
},
|
|
"id": 128,
|
|
"title": "Cluster summary graphs",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMin": 0,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineStyle": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
},
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue",
|
|
"value": 0
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "allocated CPUs"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.fillOpacity",
|
|
"value": 20
|
|
},
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 7,
|
|
"x": 0,
|
|
"y": 14
|
|
},
|
|
"id": 108,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"legendFormat": "allocated CPUs",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"instant": false,
|
|
"legendFormat": "Total",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Sum Allocated CPUs",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMin": 0,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineStyle": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
},
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue",
|
|
"value": 0
|
|
}
|
|
]
|
|
},
|
|
"unit": "mbytes"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "allocated Memory"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.fillOpacity",
|
|
"value": 20
|
|
},
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": null
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 7,
|
|
"x": 7,
|
|
"y": 14
|
|
},
|
|
"id": 109,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"legendFormat": "allocated Memory",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"instant": false,
|
|
"legendFormat": "Total",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Sum Allocated Memory",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"description": "Show the level of allocation of available resources",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"axisSoftMin": 0,
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 33,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineStyle": {
|
|
"fill": "solid"
|
|
},
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "blue",
|
|
"value": 0
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "100%",
|
|
"scope": "series"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.fillOpacity",
|
|
"value": 0
|
|
},
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "100%"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "yellow",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "%Memory"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "blue",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "%CPU"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "orange",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "A100 GPU"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "green",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "g200 GPU"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "semi-dark-purple",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 7,
|
|
"x": 14,
|
|
"y": 14
|
|
},
|
|
"id": 126,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"legendFormat": "%CPU",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}) / sum(slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"instant": false,
|
|
"legendFormat": "%Memory",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "1",
|
|
"instant": false,
|
|
"legendFormat": "100%",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_exporter_gpus_alloc{type=\"nvidia_a100-sxm4-80gb\",slurm_cluster=\"$slurm_cluster\"} / slurm_exporter_gpus_total{type=\"nvidia_a100-sxm4-80gb\",slurm_cluster=\"$slurm_cluster\"}",
|
|
"instant": false,
|
|
"legendFormat": "A100 GPU",
|
|
"range": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_exporter_gpus_alloc{type=\"gh200\",slurm_cluster=\"$slurm_cluster\"} / slurm_exporter_gpus_total{type=\"gh200\",slurm_cluster=\"$slurm_cluster\"}",
|
|
"instant": false,
|
|
"legendFormat": "g200 GPU",
|
|
"range": true,
|
|
"refId": "E"
|
|
}
|
|
],
|
|
"title": "%Resources allocated",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 30,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "total CPUs",
|
|
"scope": "series"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.drawStyle",
|
|
"value": "line"
|
|
},
|
|
{
|
|
"id": "custom.fillOpacity",
|
|
"value": 0
|
|
},
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
}
|
|
},
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "yellow",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 9,
|
|
"x": 0,
|
|
"y": 22
|
|
},
|
|
"id": 111,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_partition_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{partition}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "sum(slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"})",
|
|
"legendFormat": "total CPUs",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "CPU Allocation by Partition",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "slurm_nodes_down"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "dark-red",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "slurm_nodes_draining"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "yellow",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "slurm_nodes_drained"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "dark-orange",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 9,
|
|
"x": 9,
|
|
"y": 22
|
|
},
|
|
"id": 113,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_draining{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{__name__}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_drained{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{__name__}}",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_nodes_down{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{__name__}}",
|
|
"range": true,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "Nodes drained/draining/down",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 30
|
|
},
|
|
"id": 129,
|
|
"title": "Job Trends",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "running"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "green",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "pending"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "orange",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "completing"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"fixedColor": "blue",
|
|
"mode": "fixed"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 31
|
|
},
|
|
"id": 10,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "running",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "pending",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_completing{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "completing",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_suspended{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "suspended",
|
|
"range": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_jobs_configuring{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "configuring",
|
|
"range": true,
|
|
"refId": "E"
|
|
}
|
|
],
|
|
"title": "Job State Trends",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 31
|
|
},
|
|
"id": 11,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_sdiag_jobs_completed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "completed",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_sdiag_jobs_started{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "started",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_sdiag_jobs_submitted{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "submitted",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_sdiag_jobs_failed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "failed",
|
|
"range": true,
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_sdiag_jobs_canceled{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "cancelled",
|
|
"range": true,
|
|
"refId": "E"
|
|
}
|
|
],
|
|
"title": "Job Throughput (rate/min)",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 39
|
|
},
|
|
"id": 130,
|
|
"title": "user monitoring",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 40
|
|
},
|
|
"id": 123,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_cancelled{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Cancelled jobs per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 40
|
|
},
|
|
"id": 121,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "single",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_failed{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Failed jobs per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 48
|
|
},
|
|
"id": 116,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Jobs running per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 48
|
|
},
|
|
"id": 120,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Pending jobs per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 56
|
|
},
|
|
"id": 122,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} ",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPUs allocated per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "bars",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 56
|
|
},
|
|
"id": 119,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Allocated memory per user",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 64
|
|
},
|
|
"id": 117,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Users' avg CPUs per job",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 64
|
|
},
|
|
"id": 118,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Users' avg. memory per Job",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 1,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "mbytes"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 72
|
|
},
|
|
"id": 124,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.1+security-01",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_user_jobs_memory_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_user_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"instant": false,
|
|
"legendFormat": "{{username}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Users' avg. memory per alloc. CPU",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": true,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 80
|
|
},
|
|
"id": 131,
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "slurm_node_cpus"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 81
|
|
},
|
|
"id": 20,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}} (alloc)",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}} (total)",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "CPU Allocation by Node",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 0,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "mbytes"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "slurm_node_memory_bytes"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "custom.lineStyle",
|
|
"value": {
|
|
"dash": [
|
|
10,
|
|
10
|
|
],
|
|
"fill": "dash"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 81
|
|
},
|
|
"id": 21,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}} (alloc)",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}} (total)",
|
|
"range": true,
|
|
"refId": "B"
|
|
}
|
|
],
|
|
"title": "Memory Allocation by Node",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.7
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 0.9
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 89
|
|
},
|
|
"id": 22,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_node_cpus{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPU Utilization by Node",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 20,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.7
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 0.9
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 89
|
|
},
|
|
"id": 23,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_node_memory_alloc_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"} / slurm_node_memory_bytes{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{node}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Utilization by Node",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"title": "Per-Node Resources",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 81
|
|
},
|
|
"id": 132,
|
|
"title": "Per-Partition Status",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 30,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 82
|
|
},
|
|
"id": 30,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_partition_jobs_running{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{partition}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Running Jobs by Partition",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 30,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 82
|
|
},
|
|
"id": 31,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_partition_jobs_pending{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{partition}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Pending Jobs by Partition",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 30,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "normal"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "short"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 82
|
|
},
|
|
"id": 32,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "desc"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_partition_jobs_cpus_alloc{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "{{partition}}",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "CPU Allocation by Partition",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"collapsed": false,
|
|
"gridPos": {
|
|
"h": 1,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 90
|
|
},
|
|
"id": 133,
|
|
"title": "Scheduler Performance",
|
|
"type": "row"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "µs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 91
|
|
},
|
|
"id": 50,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_sched_mean_cycle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "main scheduler (mean)",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_schedule_cycle_last{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "main scheduler (last)",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_bf_mean_cycle{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "backfill (mean)",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_bf_cycle_last{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "backfill (last)",
|
|
"range": true,
|
|
"refId": "D"
|
|
}
|
|
],
|
|
"title": "Scheduler Cycle Time",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 91
|
|
},
|
|
"id": 51,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_schedule_queue_len{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "scheduler queue",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_bf_queue_len{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "backfill queue",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_slurmdbd_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "slurmdbd queue",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_agent_queue_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "agent queue",
|
|
"range": true,
|
|
"refId": "D"
|
|
}
|
|
],
|
|
"title": "Queue Lengths",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 91
|
|
},
|
|
"id": 52,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_backfilled_jobs{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "backfilled jobs/min",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "rate(slurm_bf_cycle_cnt{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}[5m])*60",
|
|
"legendFormat": "bf cycles/min",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_bf_depth_mean{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "bf depth (mean)",
|
|
"range": true,
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_bf_table_size{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "bf table size",
|
|
"range": true,
|
|
"refId": "D"
|
|
}
|
|
],
|
|
"title": "Backfill Statistics",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "µs"
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 99
|
|
},
|
|
"id": 53,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_sdiag_latency{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "sdiag latency",
|
|
"range": true,
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "RPC Latency",
|
|
"type": "timeseries"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"custom": {
|
|
"axisBorderShow": false,
|
|
"axisCenteredZero": false,
|
|
"axisColorMode": "text",
|
|
"axisLabel": "",
|
|
"axisPlacement": "auto",
|
|
"barAlignment": 0,
|
|
"barWidthFactor": 0.6,
|
|
"drawStyle": "line",
|
|
"fillOpacity": 10,
|
|
"gradientMode": "none",
|
|
"hideFrom": {
|
|
"legend": false,
|
|
"tooltip": false,
|
|
"viz": false
|
|
},
|
|
"insertNulls": false,
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"pointSize": 5,
|
|
"scaleDistribution": {
|
|
"type": "linear"
|
|
},
|
|
"showPoints": "auto",
|
|
"showValues": false,
|
|
"spanNulls": false,
|
|
"stacking": {
|
|
"group": "A",
|
|
"mode": "none"
|
|
},
|
|
"thresholdsStyle": {
|
|
"mode": "off"
|
|
}
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": 0
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 99
|
|
},
|
|
"id": 54,
|
|
"options": {
|
|
"annotations": {
|
|
"clustering": -1,
|
|
"multiLane": false
|
|
},
|
|
"legend": {
|
|
"calcs": [],
|
|
"displayMode": "list",
|
|
"placement": "bottom",
|
|
"showLegend": true
|
|
},
|
|
"tooltip": {
|
|
"hideZeros": false,
|
|
"mode": "multi",
|
|
"sort": "none"
|
|
}
|
|
},
|
|
"pluginVersion": "13.0.0",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_server_thread_cnt{slurm_cluster=\"$slurm_cluster\",instance=~\".*01\"}",
|
|
"legendFormat": "slurmctld threads",
|
|
"range": true,
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_agent_thread_cnt{instance=\"merlin7-slurmctld01\"}",
|
|
"legendFormat": "agent threads",
|
|
"range": true,
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"editorMode": "code",
|
|
"expr": "slurm_agent_cnt{instance=\"merlin7-slurmctld01\"}",
|
|
"legendFormat": "agents",
|
|
"range": true,
|
|
"refId": "C"
|
|
}
|
|
],
|
|
"title": "Thread Count",
|
|
"type": "timeseries"
|
|
}
|
|
],
|
|
"preload": false,
|
|
"refresh": "",
|
|
"schemaVersion": 42,
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"allowCustomValue": false,
|
|
"current": {
|
|
"text": "merlin7",
|
|
"value": "merlin7"
|
|
},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "merlin-mimir"
|
|
},
|
|
"definition": "label_values(slurm_node_cpus,cluster)",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "",
|
|
"multi": false,
|
|
"name": "slurm_cluster",
|
|
"options": [],
|
|
"query": {
|
|
"qryType": 1,
|
|
"query": "label_values(slurm_node_cpus,slurm_cluster)",
|
|
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"regexApplyTo": "value",
|
|
"skipUrlSync": false,
|
|
"sort": 0,
|
|
"type": "query"
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-6h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {
|
|
"refresh_intervals": [
|
|
"5s",
|
|
"10s",
|
|
"30s",
|
|
"1m",
|
|
"5m",
|
|
"15m",
|
|
"30m",
|
|
"1h",
|
|
"2h",
|
|
"1d"
|
|
]
|
|
},
|
|
"timezone": "browser",
|
|
"title": "Merlin7 Slurm Native OpenMetrics",
|
|
"uid": "de7rpsq1merlin7slurm1",
|
|
"version": 13
|
|
}
|