diff --git a/README.md b/README.md index e5790a7..c8d408d 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,11 @@ # PROMETHEUS & GRAFANA -Ce projet vise à monitorer un server avec une stack Docker. +Ce projet vise à monitorer des serveurs via prometheus pour centraliser le scraping, node exporter pour les host metrics, cadvisor pour les metrics des conteneurs docker et grafana pour afficher les metrics sous forme de dashboards + +![PROM](docs/prom.png) ## CONFIGURATION -- Configuration des accès via Caddy: -```bash -nano .env -``` - - Configuration de la boîte mail pour les alertes: ```bash nano alertmanager/alertmanager.yml @@ -19,10 +16,71 @@ nano alertmanager/alertmanager.yml nano alertmanager/alert.rules ``` +- Configuration des alertes: +```bash +nano alertmanager/alert.rules +``` + > Grafana est accessible via l'adresse: http://:3000 +### NODE EXPORTER TLS + +- Créer un enregistrement DNS pointant vers votre serveur +- Créer une configuration serveur pointant le DNS vers 127.0.0.1:9100 (port exposé par node exporter) [exemple pour nginx](docs/nginx-config) + +#### NODE EXPORTER HOST + +- Create certs: + +```bash +openssl req -new -newkey rsa:4096 -days 365 -nodes -x509 -keyout gn-prod.key -out gn-prod.crt -subj "/C=FR/ST=PARIS/L=GarageNum/O=prom/CN=legaragenumerique.fr" -addext "subjectAltName = DNS:gnprod" +``` + +- Create password: + +```bash +htpasswd -nBC 10 "" | tr -d ':\n'; echo +``` + +- Node exporter web.yml (/etc/node-exporter/web.yml): + +```yaml +tls_server_config: + cert_file: gn-prod.crt + key_file: gn-prod.key +basic_auth_users: + prometheus: +``` + +- Copy certs to prometheus host + +## PROMETHEUS HOST + +- Config prometheus.yml (/etc/prometheus/prometheus.yml): + +```yaml +scrape_configs: + - job_name: 'node-exporter-tls' + scheme: https + basic_auth: + username: prometheus + password: + tls_config: + ca_file: gn-prod.crt + insecure_skip_verify: true + static_configs: + - targets: ['node-exporter-ip:9100'] + labels: + instance: friendly-instance-name +``` + + + + ## TO DO -- [ ] node exporter -- [ ] node exporter -> prometheus via https -- [ ] dashboard for Grafana amd64 +- [x] node exporter +- [x] node exporter -> prometheus via https +- [ ] dashboard for Grafana amd64: + - [x] host metrics + - [ ] cadvisor for docker diff --git a/docker-compose.yml b/docker-compose.yml index 4e04ba0..91f8d86 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,6 @@ version: '2.1' + networks: monitor-net: driver: bridge @@ -10,6 +11,7 @@ volumes: services: +# METRICS GATHERER prometheus: image: prom/prometheus:v2.17.1 container_name: prometheus @@ -31,6 +33,7 @@ services: labels: org.label-schema.group: "monitoring" +# FOR ALERTS alertmanager: image: prom/alertmanager:v0.20.0 container_name: alertmanager @@ -48,6 +51,7 @@ services: labels: org.label-schema.group: "monitoring" +# FOR HOST METRICS nodeexporter: image: prom/node-exporter:v0.18.1 container_name: nodeexporter @@ -68,6 +72,7 @@ services: labels: org.label-schema.group: "monitoring" +# FOR DOCKER CONTAINERS cadvisor: image: gcr.io/cadvisor/cadvisor container_name: cadvisor @@ -85,6 +90,7 @@ services: labels: org.label-schema.group: "monitoring" +# POUR AFFICHAGE DASHBOARD grafana: image: grafana/grafana:6.7.2 container_name: grafana @@ -96,8 +102,8 @@ services: - GF_SECURITY_ADMIN_PASSWORD=${ADMIN_PASSWORD} - GF_USERS_ALLOW_SIGN_UP=false restart: unless-stopped - expose: - - 3000 + ports: + - 3000;3000 networks: - monitor-net labels: @@ -113,24 +119,3 @@ services: - monitor-net labels: org.label-schema.group: "monitoring" - - caddy: - image: caddy:2.6.4 - container_name: caddy - ports: - - "3000:3000" - - "9090:9090" - - "9093:9093" - - "9091:9091" - volumes: - - ./caddy:/etc/caddy - env_file: - - .env - # environment: - # - ADMIN_USER=${ADMIN_USER} - # - ADMIN_PASSWORD=${ADMIN_PASSWORD} - restart: unless-stopped - networks: - - monitor-net - labels: - org.label-schema.group: "monitoring" diff --git a/docs/nginx-config b/docs/nginx-config new file mode 100644 index 0000000..c9e6c23 --- /dev/null +++ b/docs/nginx-config @@ -0,0 +1,34 @@ +upstream nodeexporter { + server 127.0.0.1:9100; + } + +server { + listen 80; + listen [::]:80; + server_name monitoring.mondomaine.tld; + + location / { + proxy_pass http://nodeexporter; + } + +} + +server { + listen 443 ssl; + listen [::]:443 ssl; + server_name monitoring.mondomaine.tld; + + error_log /var/log/nginx/monitoring.mondomaine.tld-proxy-error.log; + access_log /var/log/nginx/monitoring.mondomaine.tld-proxy-access.log; + + ssl_certificate /etc/letsencrypt/live/monitoring.mondomaine.tld/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/monitoring.mondomaine.tld/privkey.pem; + + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; + ssl_ciphers HIGH:!aNULL:!MD5; + + location / { + proxy_pass http://nodeexporter; + } + +} \ No newline at end of file diff --git a/docs/prom.png b/docs/prom.png new file mode 100644 index 0000000..e521486 Binary files /dev/null and b/docs/prom.png differ diff --git a/grafana/dashboards/host-metrics.json b/grafana/dashboards/host-metrics.json new file mode 100644 index 0000000..cde4563 --- /dev/null +++ b/grafana/dashboards/host-metrics.json @@ -0,0 +1,1608 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "limit": 100, + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Basic overview of linux host metrics, based on node_exporter", + "editable": true, + "gnetId": 10180, + "graphTooltip": 1, + "id": 5, + "iteration": 1692770536273, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 42, + "panels": [], + "title": "Host Overview", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Time since last boot", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 1 + }, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "node_time_seconds{instance=~\"$host\"} - node_boot_time_seconds{instance=~\"$host\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Uptime | $job", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of processors", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 2, + "y": 1 + }, + "id": 2, + "interval": "", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "count(count(node_cpu_seconds_total{instance=~\"$host\"}) by (cpu))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Processors", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Amount of memory", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 4, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$host\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "RAM", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.6 + }, + { + "color": "#d44a3a", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "1 - avg(irate(node_cpu_seconds_total{mode=\"idle\",instance=~\"$host\"}[5m]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "CPU Load", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.6 + }, + { + "color": "#d44a3a", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "exemplar": true, + "expr": "(1 - ((avg_over_time(node_memory_MemFree_bytes{instance=~\"$host\"}[10m]) + avg_over_time(node_memory_Cached_bytes{instance=~\"$host\"}[10m]) + avg_over_time(node_memory_Buffers_bytes{instance=~\"$host\"}[10m])) / avg_over_time(node_memory_MemTotal_bytes{instance=~\"$host\"}[10m])))\n", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Memory Use", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Free diskspace", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0.6 + }, + { + "color": "#d44a3a", + "value": 0.8 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 1 + }, + "id": 8, + "interval": "", + "links": [], + "maxDataPoints": 100, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "1 - (sum(node_filesystem_free_bytes{instance=~\"$host\"}) / sum(node_filesystem_size_bytes{instance=~\"$host\"}))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Disk Free (Total)", + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Network traffic in the last hour", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 15, + "y": 1 + }, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "sum(increase(node_network_receive_bytes_total{instance=~\"$host\"}[24h]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Net IN (24h)", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Network traffic in the last hour", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "rgb(31, 120, 193)", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 17, + "y": 1 + }, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.0.2", + "repeat": "host", + "repeatDirection": "v", + "targets": [ + { + "expr": "sum(increase(node_network_transmit_bytes_total{instance=~\"$host\"}[24h]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Net OUT (24h)", + "type": "stat" + }, + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "100%", + "gridPos": { + "h": 4, + "w": 5, + "x": 19, + "y": 1 + }, + "id": 37, + "links": [], + "pageSize": null, + "repeat": "host", + "repeatDirection": "v", + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "$$hashKey": "object:503", + "alias": "Available", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 1, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "decbytes" + }, + { + "$$hashKey": "object:504", + "alias": "Mount", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "mountpoint", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "$$hashKey": "object:505", + "alias": "Type", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "fstype", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "$$hashKey": "object:506", + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "exemplar": false, + "expr": "node_filesystem_free_bytes{mountpoint=\"/\",fstype!~\"(tmpfs|rootfs).*\",instance=~\"$host\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Disk (Free)", + "transform": "table", + "type": "table-old" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 26, + "panels": [], + "repeat": null, + "title": "CPU Details", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (irate(node_cpu_seconds_total{instance=~\"$host\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Load | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 24, + "panels": [], + "repeat": null, + "title": "Memory Details", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemFree_bytes{instance=~\"$host\"} > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "node_memory_MemTotal_bytes{instance=~\"$host\"} > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B" + }, + { + "expr": "node_memory_MemAvailable_bytes{instance=~\"$host\"} > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Available", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 28, + "panels": [], + "repeat": null, + "title": "Network Details", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 14, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "min", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m]) > 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "IN ({{device}})", + "refId": "A" + }, + { + "expr": "- irate(node_network_transmit_bytes_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m]) < 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "OUT ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 2, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 10, + "x": 14, + "y": 20 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "min", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_errs_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m]) + irate(node_network_receive_drop_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Err/Drop IN ({{device}})", + "refId": "A" + }, + { + "expr": "- (irate(node_network_transmit_errs_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m]) + irate(node_network_transmit_drop_total{instance=~\"$host\",device=~\"(?i)^(ens|eth).+$\"}[5m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Err/Drop OUT ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 1, + "format": "pps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": null, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 22, + "panels": [], + "repeat": null, + "title": "Disk Details | $job", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 9, + "x": 0, + "y": 27 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_free_bytes{instance=~\"$host\",fstype!~\"(tmpfs|rootfs)\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "{{mountpoint}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk (Free) | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 9, + "y": 27 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "min", + "sortDesc": false, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_bytes_total{instance=~\"$host\"}[5m]) > 0", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Read ({{device}})", + "refId": "A" + }, + { + "expr": "- irate(node_disk_written_bytes_total{instance=~\"$host\"}[5m]) < 0", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Write ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Activity | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 7, + "x": 17, + "y": 27 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.2", + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "host", + "repeatDirection": "v", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_read_time_seconds_total{instance=~\"$host\"}[5m]) > 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read ({{device}})", + "refId": "A" + }, + { + "expr": "- irate(node_disk_write_time_seconds_total{instance=~\"$host\"}[5m]) < 0", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write ({{device}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk IO | $job", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 30, + "style": "dark", + "tags": [ + "linux", + "node-exporter", + "ops" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "nodeexporter:9100", + "value": "nodeexporter:9100" + }, + "datasource": "Prometheus", + "definition": "label_values(node_time_seconds{job=\"$job\"},instance)", + "description": null, + "error": null, + "hide": 2, + "includeAll": false, + "label": "Host", + "multi": true, + "name": "host", + "options": [], + "query": { + "query": "label_values(node_time_seconds{job=\"$job\"},instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "bigbrother", + "value": "bigbrother" + }, + "datasource": "Prometheus", + "definition": "label_values(node_boot_time_seconds,job)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(node_boot_time_seconds,job)", + "refId": "Prometheus-job-Variable-Query" + }, + "refresh": 1, + "regex": "/(.*)/", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Linux Hosts Metrics | Base", + "uid": "5vGDQ1gSk", + "version": 17 + } \ No newline at end of file diff --git a/prometheus/prometheus.yml b/prometheus/prometheus.yml index 7906963..ebda15e 100644 --- a/prometheus/prometheus.yml +++ b/prometheus/prometheus.yml @@ -12,6 +12,8 @@ rule_files: - "alert.rules" # A scrape configuration containing exactly one endpoint to scrape. + +# LOCAL SERVER scrape_configs: - job_name: 'nodeexporter' scrape_interval: 5s @@ -34,6 +36,21 @@ scrape_configs: static_configs: - targets: ['pushgateway:9091'] +# DISTANT SERVER (WITH NODE EXPORTER) + # - job_name: 'serveur-distant' + # scheme: https + # basic_auth: + # username: 'prometheus' + # password: 'htpassword-non-crypté' + # tls_config: + # ca_file: certif.crt + # insecure_skip_verify: true + # scrape_interval: 10s + # honor_labels: true + # static_configs: + # - targets: ['monitoring.mondomaine.tld:9100'] + # labels: + # instance: serveur-distant alerting: alertmanagers: