diff --git a/deployments/grafana/dashboards/http.json b/deployments/grafana/dashboards/http.json new file mode 100644 index 0000000..6c8afa6 --- /dev/null +++ b/deployments/grafana/dashboards/http.json @@ -0,0 +1,1385 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 17, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by (instance) (http_requests_in_progress_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Current In-Progress Requests", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(instance) (rate(http_requests_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval]))", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 0 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by (method, path) (\n rate(http_request_duration_seconds_sum{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])\n) \n/ \nsum by (method, path) (\n rate(http_request_duration_seconds_count{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])\n)", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Average Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 6 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "editorMode": "code", + "expr": "sum by(instance) (avg_over_time(http_requests_in_progress_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Avg In-Progress Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 6 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(method, path, le) (rate(http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "P99 Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 6, + "x": 0, + "y": 12 + }, + "id": 8, + "options": { + "legend": { + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "sort": "desc", + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(status_code) (rate(http_requests_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\"}[$__rate_interval]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests by Status Code", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 12 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by(method) (rate(http_requests_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", status_code=~\"$status_code\"}[$__rate_interval]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests by Method", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 12 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(method, path, le) (rate(http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])))", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "P95 Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 18 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum by (instance) (rate(http_requests_total{status_code=~\"5..\", instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\"}[$__rate_interval]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Error Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, sum by(method, path, le) (rate(http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])))", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "P50 Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 10, + "options": { + "displayMode": "lcd", + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "top", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(5, sum by(path) (rate(http_requests_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])))", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Top 5 Paths by Request Volume", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 6, + "y": 24 + }, + "id": 11, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 100 + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "sum(increase(http_requests_total{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[7d]))", + "legendFormat": "Requests", + "range": true, + "refId": "A" + } + ], + "title": "7-Day Request Volume", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 24 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": true, + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "12.2.0-16979757807", + "targets": [ + { + "datasource": { + "uid": "Prometheus" + }, + "editorMode": "code", + "expr": "max_over_time(histogram_quantile(0.99, sum by(instance, le) (rate(http_request_duration_seconds_bucket{instance=~\"$instance\", job=~\"$job\", service_name=~\"$service\", method=~\"$method\", status_code=~\"$status_code\"}[$__rate_interval])))[7d:])", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "7-Day P99 Latency", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "auto", + "schemaVersion": 41, + "tags": [ + "http" + ], + "templating": { + "list": [ + { + "allowCustomValue": false, + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "definition": "label_values(http_requests_total,instance)", + "includeAll": true, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(http_requests_total,instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "type": "query" + }, + { + "current": { + "text": "backend", + "value": "backend" + }, + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "definition": "label_values(http_requests_total,job)", + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(http_requests_total,job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "definition": "label_values(http_requests_total,service_name)", + "includeAll": true, + "name": "service", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(http_requests_total,service_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "definition": "label_values(http_requests_total,method)", + "includeAll": true, + "multi": true, + "name": "method", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(http_requests_total,method)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "type": "query" + }, + { + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "edqp0a73uh2bka" + }, + "definition": "label_values(http_requests_total,status_code)", + "includeAll": true, + "multi": true, + "name": "status_code", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(http_requests_total,status_code)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "sort": 3, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "HTTP Service Metrics", + "uid": "144165a0-ff09-4d45-8d15-ca39d0c488bb", + "version": 18 +} \ No newline at end of file diff --git a/deployments/prometheus/alerts/http-alerts.yml b/deployments/prometheus/alerts/http-alerts.yml new file mode 100644 index 0000000..3f59902 --- /dev/null +++ b/deployments/prometheus/alerts/http-alerts.yml @@ -0,0 +1,56 @@ +groups: + - name: http-alerts + rules: + - alert: HighHTTPErrorRateWarning + expr: sum(rate(http_requests_total{job="backend", status_code=~"5.."}[5m])) / sum(rate(http_requests_total{job="backend"}[5m])) > 0.05 + for: 5m + labels: + severity: warning + annotations: + summary: "High HTTP error rate (Warning)" + description: "The HTTP error rate has exceeded 5% over the last 5 minutes." + + - alert: HighHTTPErrorRateCritical + expr: sum(rate(http_requests_total{job="backend", status_code=~"5.."}[5m])) / sum(rate(http_requests_total{job="backend"}[5m])) > 0.1 + for: 5m + labels: + severity: critical + annotations: + summary: "High HTTP error rate (Critical)" + description: "The HTTP error rate has exceeded 10% over the last 5 minutes." + + - alert: HighHTTPLatencyWarning + expr: histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job="backend"}[5m])) by (le)) > 0.3 + for: 5m + labels: + severity: warning + annotations: + summary: "High HTTP latency (p99) (Warning)" + description: "The p99 HTTP latency has exceeded 0.3 seconds over the last 5 minutes." + + - alert: HighHTTPLatencyCritical + expr: histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket{job="backend"}[5m])) by (le)) > 1 + for: 5m + labels: + severity: critical + annotations: + summary: "High HTTP latency (p99) (Critical)" + description: "The p99 HTTP latency has exceeded 1 seconds over the last 5 minutes." + + - alert: IncreasedHTTPRequestVolumeWarning + expr: rate(http_requests_total{job="backend"}[1m]) > 2 * avg_over_time(rate(http_requests_total{job="backend"}[1m])[10m:1m]) + for: 5m + labels: + severity: warning + annotations: + summary: "Increased HTTP request volume (Warning)" + description: "The HTTP request volume has increased by 2x compared to the average of the last 10 minutes." + + - alert: IncreasedHTTPRequestVolumeCritical + expr: rate(http_requests_total{job="backend"}[1m]) > 5 * avg_over_time(rate(http_requests_total{job="backend"}[1m])[10m]) + for: 5m + labels: + severity: critical + annotations: + summary: "Increased HTTP request volume (Critical)" + description: "The HTTP request volume has increased by 5x compared to the average of the last 10 minutes."