{ "annotations": { "list": [ { "builtIn": 1, "datasource": {"type": "grafana", "uid": "-- Grafana --"}, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "liveNow": false, "panels": [ { "type": "row", "title": "Latency", "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "id": 100, "collapsed": false, "panels": [] }, { "type": "heatmap", "title": "Inference duration heatmap", "id": 1, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 9, "w": 18, "x": 0, "y": 1}, "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, "options": { "calculate": false, "cellGap": 1, "color": {"mode": "scheme", "scheme": "Oranges", "steps": 64, "exponent": 0.5}, "yAxis": {"unit": "s"}, "tooltip": {"show": true} }, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum by (le) (rate(inference_duration_seconds_bucket[5m]))", "format": "heatmap", "legendFormat": "{{le}}" } ] }, { "type": "stat", "title": "Current p50 / p95", "id": 2, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 9, "w": 6, "x": 18, "y": 1}, "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, "options": { "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "orientation": "vertical", "textMode": "auto", "colorMode": "value", "graphMode": "area" }, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.50, sum by (le) (rate(inference_duration_seconds_bucket[5m])))", "legendFormat": "p50" }, { "refId": "B", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, sum by (le) (rate(inference_duration_seconds_bucket[5m])))", "legendFormat": "p95" } ] }, { "type": "row", "title": "Throughput", "gridPos": {"h": 1, "w": 24, "x": 0, "y": 10}, "id": 101, "collapsed": false, "panels": [] }, { "type": "timeseries", "title": "Tokens generated per second", "id": 3, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 8, "w": 24, "x": 0, "y": 11}, "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(rate(tokens_generated_total[1m]))", "legendFormat": "tokens/s" } ] }, { "type": "row", "title": "Workers", "gridPos": {"h": 1, "w": 24, "x": 0, "y": 19}, "id": 102, "collapsed": false, "panels": [] }, { "type": "gauge", "title": "Worker state", "id": 4, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 20}, "fieldConfig": { "defaults": { "unit": "short", "min": 0 }, "overrides": [] }, "options": { "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "orientation": "auto", "showThresholdLabels": false, "showThresholdMarkers": true }, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(workers_total)", "legendFormat": "total" }, { "refId": "B", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(workers_busy)", "legendFormat": "busy" }, { "refId": "C", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(workers_available)", "legendFormat": "available" } ] }, { "type": "timeseries", "title": "Worker utilization over time (busy / total)", "id": 5, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 20}, "fieldConfig": {"defaults": {"unit": "percentunit", "min": 0, "max": 1}, "overrides": []}, "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(workers_busy) / clamp_min(sum(workers_total), 1)", "legendFormat": "utilization" } ] }, { "type": "row", "title": "Active", "gridPos": {"h": 1, "w": 24, "x": 0, "y": 28}, "id": 103, "collapsed": false, "panels": [] }, { "type": "stat", "title": "Active generations", "id": 6, "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "gridPos": {"h": 6, "w": 24, "x": 0, "y": 29}, "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, "options": { "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false}, "orientation": "auto", "textMode": "value_and_name", "colorMode": "value", "graphMode": "area" }, "targets": [ { "refId": "A", "datasource": {"type": "prometheus", "uid": "${DS_PROMETHEUS}"}, "expr": "sum(active_generations)", "legendFormat": "active" } ] } ], "refresh": "15s", "schemaVersion": 39, "style": "dark", "tags": ["samosachaat", "inference"], "templating": { "list": [ { "current": {"selected": false, "text": "Prometheus", "value": "prometheus"}, "hide": 0, "includeAll": false, "label": "Datasource", "multi": false, "name": "DS_PROMETHEUS", "options": [], "query": "prometheus", "queryValue": "", "refresh": 1, "regex": "", "skipUrlSync": false, "type": "datasource" } ] }, "time": {"from": "now-1h", "to": "now"}, "timepicker": {}, "timezone": "", "title": "samosaChaat — Inference Service", "uid": "samosachaat-inference", "version": 1, "weekStart": "" }