2144 lines
53 KiB
JSON
2144 lines
53 KiB
JSON
{
|
|
"annotations": {
|
|
"list": [
|
|
{
|
|
"builtIn": 1,
|
|
"datasource": {
|
|
"type": "datasource",
|
|
"uid": "grafana"
|
|
},
|
|
"enable": true,
|
|
"hide": true,
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
|
"name": "Annotations & Alerts",
|
|
"type": "dashboard"
|
|
}
|
|
]
|
|
},
|
|
"description": "",
|
|
"editable": true,
|
|
"fiscalYearStartMonth": 0,
|
|
"gnetId": 14574,
|
|
"graphTooltip": 0,
|
|
"id": 34,
|
|
"links": [
|
|
{
|
|
"asDropdown": true,
|
|
"icon": "external link",
|
|
"includeVars": false,
|
|
"keepTime": false,
|
|
"tags": [
|
|
"Zogg"
|
|
],
|
|
"targetBlank": false,
|
|
"title": "Dashboards",
|
|
"tooltip": "",
|
|
"type": "dashboards",
|
|
"url": ""
|
|
}
|
|
],
|
|
"liveNow": false,
|
|
"panels": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The official product name of the GPU. This is an alphanumeric string. For all products.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"id": 23,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "name"
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}",
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "{{name}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Name",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"": {
|
|
"text": ""
|
|
}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "prefix:P"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 2,
|
|
"x": 4,
|
|
"y": 0
|
|
},
|
|
"id": 22,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "value"
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_pstate{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "P-State",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 0
|
|
},
|
|
"id": 6,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU Utilization %",
|
|
"transformations": [],
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts / The software power limit in watts.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 9,
|
|
"y": 0
|
|
},
|
|
"id": 21,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"} / nvidia_smi_power_default_limit_watts{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Power Draw %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"id": 4,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Fan Speed %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Core GPU temperature. in degrees C.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 100,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 80
|
|
}
|
|
]
|
|
},
|
|
"unit": "celsius"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 15,
|
|
"y": 0
|
|
},
|
|
"id": 16,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Temperature",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 0
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 11,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"$$hashKey": "object:1370",
|
|
"colorMode": "warning",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.7,
|
|
"yaxis": "left"
|
|
},
|
|
{
|
|
"$$hashKey": "object:1376",
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.9,
|
|
"yaxis": "left"
|
|
}
|
|
],
|
|
"timeRegions": [],
|
|
"title": "Memory Utilization %",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1352",
|
|
"format": "percentunit",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"max": "1",
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1353",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The version of the installed NVIDIA display driver. This is an alphanumeric string.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 3
|
|
},
|
|
"id": 14,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "name"
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}",
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "{{driver_version}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Driver Version",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The BIOS of the GPU board.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 2,
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 3
|
|
},
|
|
"id": 34,
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "none",
|
|
"justifyMode": "auto",
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"text": {},
|
|
"textMode": "name"
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}",
|
|
"instant": true,
|
|
"interval": "",
|
|
"legendFormat": "{{vbios_version}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Vbios Version",
|
|
"type": "stat"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Information about factors that are reducing the frequency of clocks. If all throttle reasons are returned as \"Not Active\" it means that clocks are running as high as possible.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"decimals": 0,
|
|
"mappings": [
|
|
{
|
|
"options": {
|
|
"0": {
|
|
"text": "Not Active"
|
|
},
|
|
"1": {
|
|
"text": "Active"
|
|
}
|
|
},
|
|
"type": "value"
|
|
}
|
|
],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 5
|
|
},
|
|
"id": 32,
|
|
"links": [],
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"minVizHeight": 10,
|
|
"minVizWidth": 0,
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showUnfilled": true,
|
|
"text": {},
|
|
"valueMode": "color"
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_gpu_idle{uuid=\"$gpu\"}",
|
|
"instant": false,
|
|
"interval": "",
|
|
"legendFormat": "Idle",
|
|
"refId": "A"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_hw_thermal_slowdown{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "HW Thermal Slowdown",
|
|
"refId": "B"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_sw_power_cap{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "SW Power Cap",
|
|
"refId": "C"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_applications_clocks_setting{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "App Clocks Setting",
|
|
"refId": "D"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_hw_power_brake_slowdown{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "HW Power Brake",
|
|
"refId": "E"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_sw_thermal_slowdown{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "SW Thermal Slowdown",
|
|
"refId": "F"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_throttle_reasons_sync_boost{uuid=\"$gpu\"}",
|
|
"hide": false,
|
|
"interval": "",
|
|
"legendFormat": "Sync Boost",
|
|
"refId": "G"
|
|
}
|
|
],
|
|
"title": "Throttle Reasons",
|
|
"type": "bargauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of graphics (shader) clock\n/\nMaximum frequency of graphics (shader) clock.\n",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 5
|
|
},
|
|
"id": 20,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_graphics_clock_hz{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "GPU Clock Speed %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of memory clock / Maximum frequency of memory clock",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 9,
|
|
"y": 5
|
|
},
|
|
"id": 33,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_memory_clock_hz{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Clock Speed %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Total memory allocated by active contexts / Total installed GPU memory.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 12,
|
|
"y": 5
|
|
},
|
|
"id": 25,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"} / nvidia_smi_memory_total_bytes{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Allocation %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"max": 1,
|
|
"min": 0,
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "#EAB839",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 90
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 15,
|
|
"y": 5
|
|
},
|
|
"id": 7,
|
|
"options": {
|
|
"orientation": "auto",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"last"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"showThresholdLabels": false,
|
|
"showThresholdMarkers": true,
|
|
"text": {}
|
|
},
|
|
"pluginVersion": "10.1.4",
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"title": "Memory Utilization %",
|
|
"type": "gauge"
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 5
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 10,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"$$hashKey": "object:1370",
|
|
"colorMode": "warning",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.7,
|
|
"yaxis": "left"
|
|
},
|
|
{
|
|
"$$hashKey": "object:1376",
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.9,
|
|
"yaxis": "left"
|
|
}
|
|
],
|
|
"timeRegions": [],
|
|
"title": "GPU Utilization %",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1352",
|
|
"format": "percentunit",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"max": "1",
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1353",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Total memory allocated by active contexts.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "decbytes"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 17,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Memory Allocation",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1352",
|
|
"format": "decbytes",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1353",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Core GPU temperature. in degrees C.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "celsius"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 10
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 15,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"$$hashKey": "object:1805",
|
|
"colorMode": "warning",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 70,
|
|
"yaxis": "left"
|
|
},
|
|
{
|
|
"$$hashKey": "object:1811",
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 80,
|
|
"yaxis": "left"
|
|
}
|
|
],
|
|
"timeRegions": [],
|
|
"title": "Temperature",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1761",
|
|
"format": "celsius",
|
|
"label": "",
|
|
"logBase": 1,
|
|
"max": "100",
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1762",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "watt"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 10
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 8,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Power Draw",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:658",
|
|
"format": "watt",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:659",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percentunit"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 10
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 9,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [
|
|
{
|
|
"$$hashKey": "object:1168",
|
|
"colorMode": "critical",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.9,
|
|
"yaxis": "left"
|
|
},
|
|
{
|
|
"$$hashKey": "object:1174",
|
|
"colorMode": "warning",
|
|
"fill": true,
|
|
"line": true,
|
|
"op": "gt",
|
|
"value": 0.7,
|
|
"yaxis": "left"
|
|
}
|
|
],
|
|
"timeRegions": [],
|
|
"title": "Fan Speed %",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1275",
|
|
"format": "percentunit",
|
|
"logBase": 1,
|
|
"max": "1",
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1276",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of graphics (shader) clock.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "hertz"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 15
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 12,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"}",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Graphics Clock Speed",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1642",
|
|
"format": "hertz",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1643",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of video encoder/decoder clock.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "hertz"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 15
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 19,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_video_clock_hz{uuid=\"$gpu\"}",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Video Clock Speed",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1642",
|
|
"format": "hertz",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1643",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of SM (Streaming Multiprocessor) clock.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "hertz"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 15
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 24,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_sm_clock_hz{uuid=\"$gpu\"}",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "SM Clock Speed",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1642",
|
|
"format": "hertz",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1643",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
},
|
|
{
|
|
"aliasColors": {},
|
|
"bars": false,
|
|
"dashLength": 10,
|
|
"dashes": false,
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"description": "Current frequency of memory clock.",
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "hertz"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"fill": 1,
|
|
"fillGradient": 0,
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 15
|
|
},
|
|
"hiddenSeries": false,
|
|
"id": 18,
|
|
"legend": {
|
|
"avg": false,
|
|
"current": false,
|
|
"max": false,
|
|
"min": false,
|
|
"show": false,
|
|
"total": false,
|
|
"values": false
|
|
},
|
|
"lines": true,
|
|
"linewidth": 1,
|
|
"nullPointMode": "null",
|
|
"options": {
|
|
"alertThreshold": true
|
|
},
|
|
"percentage": false,
|
|
"pluginVersion": "10.1.4",
|
|
"pointradius": 2,
|
|
"points": false,
|
|
"renderer": "flot",
|
|
"seriesOverrides": [],
|
|
"spaceLength": 10,
|
|
"stack": false,
|
|
"steppedLine": false,
|
|
"targets": [
|
|
{
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"exemplar": true,
|
|
"expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"}",
|
|
"format": "time_series",
|
|
"interval": "",
|
|
"legendFormat": "{{uuid}}",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"thresholds": [],
|
|
"timeRegions": [],
|
|
"title": "Memory Clock Speed",
|
|
"tooltip": {
|
|
"shared": true,
|
|
"sort": 0,
|
|
"value_type": "individual"
|
|
},
|
|
"type": "graph",
|
|
"xaxis": {
|
|
"mode": "time",
|
|
"show": true,
|
|
"values": []
|
|
},
|
|
"yaxes": [
|
|
{
|
|
"$$hashKey": "object:1642",
|
|
"format": "hertz",
|
|
"logBase": 1,
|
|
"min": "0",
|
|
"show": true
|
|
},
|
|
{
|
|
"$$hashKey": "object:1643",
|
|
"format": "short",
|
|
"logBase": 1,
|
|
"show": true
|
|
}
|
|
],
|
|
"yaxis": {
|
|
"align": false
|
|
}
|
|
}
|
|
],
|
|
"refresh": "5m",
|
|
"schemaVersion": 38,
|
|
"style": "dark",
|
|
"tags": [
|
|
"Zogg",
|
|
"Prometheus",
|
|
"NVidia"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"current": {
|
|
"selected": true,
|
|
"text": "49c2588d-2f3e-db78-bca6-fcf74589911f",
|
|
"value": "49c2588d-2f3e-db78-bca6-fcf74589911f"
|
|
},
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10"
|
|
},
|
|
"definition": "label_values(nvidia_smi_index, uuid)",
|
|
"hide": 0,
|
|
"includeAll": false,
|
|
"label": "GPU",
|
|
"multi": false,
|
|
"name": "gpu",
|
|
"options": [],
|
|
"query": {
|
|
"query": "label_values(nvidia_smi_index, uuid)",
|
|
"refId": "StandardVariableQuery"
|
|
},
|
|
"refresh": 1,
|
|
"regex": "",
|
|
"skipUrlSync": false,
|
|
"sort": 1,
|
|
"tagValuesQuery": "",
|
|
"tagsQuery": "",
|
|
"type": "query",
|
|
"useTags": false
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"timepicker": {},
|
|
"timezone": "",
|
|
"title": "Nvidia",
|
|
"uid": "ce3ec1ef-3d84-45ef-b608-aa1fde340639",
|
|
"version": 5,
|
|
"weekStart": ""
|
|
} |