{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "datasource", "uid": "grafana" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "", "editable": true, "fiscalYearStartMonth": 0, "gnetId": 14574, "graphTooltip": 0, "id": 34, "links": [ { "asDropdown": true, "icon": "external link", "includeVars": false, "keepTime": false, "tags": [ "Zogg" ], "targetBlank": false, "title": "Dashboards", "tooltip": "", "type": "dashboards", "url": "" } ], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The official product name of the GPU. This is an alphanumeric string. For all products.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 2, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 0, "y": 0 }, "id": 23, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "name" }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", "instant": true, "interval": "", "legendFormat": "{{name}}", "refId": "A" } ], "title": "Name", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "mappings": [ { "options": { "": { "text": "" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "prefix:P" }, "overrides": [] }, "gridPos": { "h": 3, "w": 2, "x": 4, "y": 0 }, "id": 22, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "value" }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_pstate{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "P-State", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 6, "y": 0 }, "id": 6, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "title": "GPU Utilization %", "transformations": [], "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts / The software power limit in watts.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 9, "y": 0 }, "id": 21, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"} / nvidia_smi_power_default_limit_watts{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Power Draw %", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 12, "y": 0 }, "id": 4, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Fan Speed %", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Core GPU temperature. in degrees C.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 80 } ] }, "unit": "celsius" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 15, "y": 0 }, "id": 16, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "title": "Temperature", "type": "gauge" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.", "fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 }, "hiddenSeries": false, "id": 11, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [ { "$$hashKey": "object:1370", "colorMode": "warning", "fill": true, "line": true, "op": "gt", "value": 0.7, "yaxis": "left" }, { "$$hashKey": "object:1376", "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9, "yaxis": "left" } ], "timeRegions": [], "title": "Memory Utilization %", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1352", "format": "percentunit", "label": "", "logBase": 1, "max": "1", "min": "0", "show": true }, { "$$hashKey": "object:1353", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The version of the installed NVIDIA display driver. This is an alphanumeric string.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 2, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 2, "w": 3, "x": 0, "y": 3 }, "id": 14, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "name" }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", "instant": true, "interval": "", "legendFormat": "{{driver_version}}", "refId": "A" } ], "title": "Driver Version", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The BIOS of the GPU board.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 2, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 2, "w": 3, "x": 3, "y": 3 }, "id": 34, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "name" }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", "instant": true, "interval": "", "legendFormat": "{{vbios_version}}", "refId": "A" } ], "title": "Vbios Version", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Information about factors that are reducing the frequency of clocks. If all throttle reasons are returned as \"Not Active\" it means that clocks are running as high as possible.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "decimals": 0, "mappings": [ { "options": { "0": { "text": "Not Active" }, "1": { "text": "Active" } }, "type": "value" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 5 }, "id": 32, "links": [], "options": { "displayMode": "gradient", "minVizHeight": 10, "minVizWidth": 0, "orientation": "horizontal", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showUnfilled": true, "text": {}, "valueMode": "color" }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_gpu_idle{uuid=\"$gpu\"}", "instant": false, "interval": "", "legendFormat": "Idle", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_hw_thermal_slowdown{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "HW Thermal Slowdown", "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_sw_power_cap{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "SW Power Cap", "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_applications_clocks_setting{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "App Clocks Setting", "refId": "D" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_hw_power_brake_slowdown{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "HW Power Brake", "refId": "E" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_sw_thermal_slowdown{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "SW Thermal Slowdown", "refId": "F" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_throttle_reasons_sync_boost{uuid=\"$gpu\"}", "hide": false, "interval": "", "legendFormat": "Sync Boost", "refId": "G" } ], "title": "Throttle Reasons", "type": "bargauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of graphics (shader) clock\n/\nMaximum frequency of graphics (shader) clock.\n", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 6, "y": 5 }, "id": 20, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_graphics_clock_hz{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "GPU Clock Speed %", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of memory clock / Maximum frequency of memory clock", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 9, "y": 5 }, "id": 33, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_memory_clock_hz{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Memory Clock Speed %", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Total memory allocated by active contexts / Total installed GPU memory.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 12, "y": 5 }, "id": 25, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"} / nvidia_smi_memory_total_bytes{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Memory Allocation %", "type": "gauge" }, { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "max": 1, "min": 0, "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "#EAB839", "value": 70 }, { "color": "red", "value": 90 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 15, "y": 5 }, "id": 7, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "text": {} }, "pluginVersion": "10.1.4", "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Memory Utilization %", "type": "gauge" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.", "fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 5 }, "hiddenSeries": false, "id": 10, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [ { "$$hashKey": "object:1370", "colorMode": "warning", "fill": true, "line": true, "op": "gt", "value": 0.7, "yaxis": "left" }, { "$$hashKey": "object:1376", "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9, "yaxis": "left" } ], "timeRegions": [], "title": "GPU Utilization %", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1352", "format": "percentunit", "label": "", "logBase": 1, "max": "1", "min": "0", "show": true }, { "$$hashKey": "object:1353", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Total memory allocated by active contexts.", "fieldConfig": { "defaults": { "unit": "decbytes" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 10 }, "hiddenSeries": false, "id": 17, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Memory Allocation", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1352", "format": "decbytes", "label": "", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:1353", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Core GPU temperature. in degrees C.", "fieldConfig": { "defaults": { "unit": "celsius" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 10 }, "hiddenSeries": false, "id": 15, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [ { "$$hashKey": "object:1805", "colorMode": "warning", "fill": true, "line": true, "op": "gt", "value": 70, "yaxis": "left" }, { "$$hashKey": "object:1811", "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 80, "yaxis": "left" } ], "timeRegions": [], "title": "Temperature", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1761", "format": "celsius", "label": "", "logBase": 1, "max": "100", "min": "0", "show": true }, { "$$hashKey": "object:1762", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts", "fieldConfig": { "defaults": { "unit": "watt" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 10 }, "hiddenSeries": false, "id": 8, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Power Draw", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:658", "format": "watt", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:659", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n", "fieldConfig": { "defaults": { "unit": "percentunit" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 10 }, "hiddenSeries": false, "id": 9, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [ { "$$hashKey": "object:1168", "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.9, "yaxis": "left" }, { "$$hashKey": "object:1174", "colorMode": "warning", "fill": true, "line": true, "op": "gt", "value": 0.7, "yaxis": "left" } ], "timeRegions": [], "title": "Fan Speed %", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1275", "format": "percentunit", "logBase": 1, "max": "1", "min": "0", "show": true }, { "$$hashKey": "object:1276", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of graphics (shader) clock.", "fieldConfig": { "defaults": { "unit": "hertz" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 15 }, "hiddenSeries": false, "id": 12, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"}", "format": "time_series", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Graphics Clock Speed", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1642", "format": "hertz", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:1643", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of video encoder/decoder clock.", "fieldConfig": { "defaults": { "unit": "hertz" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 15 }, "hiddenSeries": false, "id": 19, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_video_clock_hz{uuid=\"$gpu\"}", "format": "time_series", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Video Clock Speed", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1642", "format": "hertz", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:1643", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of SM (Streaming Multiprocessor) clock.", "fieldConfig": { "defaults": { "unit": "hertz" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 15 }, "hiddenSeries": false, "id": 24, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_sm_clock_hz{uuid=\"$gpu\"}", "format": "time_series", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "SM Clock Speed", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1642", "format": "hertz", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:1643", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "description": "Current frequency of memory clock.", "fieldConfig": { "defaults": { "unit": "hertz" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 15 }, "hiddenSeries": false, "id": 18, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": false, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "10.1.4", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "exemplar": true, "expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"}", "format": "time_series", "interval": "", "legendFormat": "{{uuid}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Memory Clock Speed", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:1642", "format": "hertz", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:1643", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } } ], "refresh": "5m", "schemaVersion": 38, "style": "dark", "tags": [ "Zogg", "Prometheus", "NVidia" ], "templating": { "list": [ { "current": { "selected": true, "text": "49c2588d-2f3e-db78-bca6-fcf74589911f", "value": "49c2588d-2f3e-db78-bca6-fcf74589911f" }, "datasource": { "type": "prometheus", "uid": "b00a2176-d15e-49e1-9a3c-e7cc3783ca10" }, "definition": "label_values(nvidia_smi_index, uuid)", "hide": 0, "includeAll": false, "label": "GPU", "multi": false, "name": "gpu", "options": [], "query": { "query": "label_values(nvidia_smi_index, uuid)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "Nvidia", "uid": "ce3ec1ef-3d84-45ef-b608-aa1fde340639", "version": 5, "weekStart": "" }