maj provisioning pour alerts via mail
This commit is contained in:
parent
43441f012e
commit
379d491b39
78
README.md
78
README.md
@ -1,19 +1,28 @@
|
|||||||
# PROMETHEUS & GRAFANA
|
# PROMETHEUS & GRAFANA :bar_chart:
|
||||||
|
|
||||||
Ce projet vise à monitorer des serveurs via prometheus pour centraliser le scraping, node exporter pour les host metrics, cadvisor pour les metrics des conteneurs docker et grafana pour afficher les metrics sous forme de dashboards
|
Ce projet vise à monitorer des serveurs via prometheus pour centraliser le scraping, node exporter pour les host metrics, cadvisor pour les metrics des conteneurs docker et grafana pour afficher les metrics sous forme de dashboards
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## CONFIGURATION
|
## PREREQUIS :memo:
|
||||||
|
|
||||||
|
- [docker + compose plugin]() :whale:
|
||||||
|
|
||||||
|
## CONFIGURATION :wrench:
|
||||||
|
|
||||||
|
### ALERTES VIA EMAIL :mail:
|
||||||
|
|
||||||
|
#### VIA ALERTMANAGER
|
||||||
|
|
||||||
- Configuration de la boîte mail pour les alertes:
|
- Configuration de la boîte mail pour les alertes:
|
||||||
```bash
|
```bash
|
||||||
nano alertmanager/alertmanager.yml
|
nano alertmanager/alertmanager.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
- Configuration des alertes:
|
> Renseigner les champs:
|
||||||
```bash
|
|
||||||
nano alertmanager/alert.rules
|
```yml
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
- Configuration des alertes:
|
- Configuration des alertes:
|
||||||
@ -21,14 +30,42 @@ nano alertmanager/alert.rules
|
|||||||
nano alertmanager/alert.rules
|
nano alertmanager/alert.rules
|
||||||
```
|
```
|
||||||
|
|
||||||
> Grafana est accessible via l'adresse: http://<IP-SERVER>:3000
|
> Renseigner les champs:
|
||||||
|
|
||||||
### NODE EXPORTER TLS
|
```
|
||||||
|
|
||||||
Pour déployer un node exporter sur un serveur distant:
|
```
|
||||||
> voir branche [node-exporter](https://git.legaragenumerique.fr/GARAGENUM/prometheus-monitoring/src/branch/node-exporter)
|
|
||||||
|
|
||||||
### GRAFANA SSO KEYCLOAK
|
#### VIA GRAFANA
|
||||||
|
|
||||||
|
- Configuration de la boîte mail pour les alertes:
|
||||||
|
```bash
|
||||||
|
nano grafana/config/grafana.ini
|
||||||
|
```
|
||||||
|
|
||||||
|
> Renseigner les champs:
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
- Configuration du notifier (email) par ddefault:
|
||||||
|
```bash
|
||||||
|
nano grafana/provisioning/notifiers.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
> Renseigner les champs:
|
||||||
|
|
||||||
|
```yml
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
- Commenter alermanager dans le docker-compose.yml:
|
||||||
|
```bash
|
||||||
|
sed -i "" docker-compose.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
### GRAFANA SSO KEYCLOAK :key:
|
||||||
|
|
||||||
- Créer un client sur keycloak en confidential pour obtenir le client-secret
|
- Créer un client sur keycloak en confidential pour obtenir le client-secret
|
||||||
- Entrer le nom de domaine de votre instance grafana
|
- Entrer le nom de domaine de votre instance grafana
|
||||||
@ -58,11 +95,28 @@ api_url = https://votre-keycloak/auth/realms/votre-royaume/protocol/openid-conne
|
|||||||
#disable_login_form = true
|
#disable_login_form = true
|
||||||
```
|
```
|
||||||
|
|
||||||
## TO DO
|
## UTILISATION :chackered_flag:
|
||||||
|
|
||||||
|
- Démarrer la stack:
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
> Grafana est accessible via l'adresse: http://<IP-SERVER>:3000
|
||||||
|
|
||||||
|
### NODE EXPORTER TLS
|
||||||
|
|
||||||
|
Pour déployer un node exporter sur un serveur distant:
|
||||||
|
> voir branche [node-exporter](https://git.legaragenumerique.fr/GARAGENUM/prometheus-monitoring/src/branch/node-exporter)
|
||||||
|
|
||||||
|
|
||||||
|
## TO DO :bookmark_tabs:
|
||||||
|
|
||||||
- [x] node exporter
|
- [x] node exporter
|
||||||
- [x] node exporter -> prometheus via https
|
- [x] node exporter -> prometheus via https
|
||||||
- [ ] alert manager config
|
- [x] alert manager config / grafana alert via mail config
|
||||||
|
- [x] provision dashboard / notifier par default
|
||||||
- [ ] dashboard for Grafana amd64:
|
- [ ] dashboard for Grafana amd64:
|
||||||
- [x] host metrics
|
- [x] host metrics
|
||||||
- [ ] cadvisor for docker
|
- [ ] cadvisor for docker
|
||||||
|
- [ ] config alertes sous grafana + images
|
||||||
|
|||||||
@ -1,172 +0,0 @@
|
|||||||
groups:
|
|
||||||
- name: targets
|
|
||||||
rules:
|
|
||||||
- alert: monitor_service_down
|
|
||||||
expr: up == 0
|
|
||||||
for: 30s
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Monitor service non-operational"
|
|
||||||
description: "Service {{ $labels.instance }} is down."
|
|
||||||
|
|
||||||
## FOR HOST ##################################################################
|
|
||||||
|
|
||||||
- name: host
|
|
||||||
rules:
|
|
||||||
- alert: HostHighCpuLoad
|
|
||||||
expr: 100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host high CPU load (instance {{ $labels.instance }})
|
|
||||||
description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: high_memory_load
|
|
||||||
expr: (sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100 > 85
|
|
||||||
for: 30s
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Server memory is almost full"
|
|
||||||
description: "Docker host memory usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
|
|
||||||
|
|
||||||
- alert: HostPhysicalComponentTooHot
|
|
||||||
expr: node_hwmon_temp_celsius > 75
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host physical component too hot (instance {{ $labels.instance }})
|
|
||||||
description: "Physical hardware component too hot\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: HostSwapIsFillingUp
|
|
||||||
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host swap is filling up (instance {{ $labels.instance }})
|
|
||||||
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: high_storage_load
|
|
||||||
expr: (node_filesystem_size_bytes{fstype="aufs"} - node_filesystem_free_bytes{fstype="aufs"}) / node_filesystem_size_bytes{fstype="aufs"} * 100 > 85
|
|
||||||
for: 30s
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: "Server storage is almost full"
|
|
||||||
description: "Docker host storage usage is {{ humanize $value}}%. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
|
|
||||||
|
|
||||||
- alert: HostOutOfMemory
|
|
||||||
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host out of memory (instance {{ $labels.instance }})
|
|
||||||
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: HostUnusualNetworkThroughputIn
|
|
||||||
expr: sum by (instance) (rate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host unusual network throughput in (instance {{ $labels.instance }})
|
|
||||||
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: HostUnusualNetworkThroughputOut
|
|
||||||
expr: sum by (instance) (rate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
|
|
||||||
for: 5m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host unusual network throughput out (instance {{ $labels.instance }})
|
|
||||||
description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: HostOutOfDiskSpace
|
|
||||||
expr: (node_filesystem_avail_bytes * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host out of disk space (instance {{ $labels.instance }})
|
|
||||||
description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
## FOR RAID ##########################################################
|
|
||||||
|
|
||||||
- alert: HostRaidArrayGotInactive
|
|
||||||
expr: node_md_state{state="inactive"} > 0
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Host RAID array got inactive (instance {{ $labels.instance }})
|
|
||||||
description: "RAID array {{ $labels.device }} is in degraded state due to one or more disks failures. Number of spare drives is insufficient to fix issue automatically.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: HostRaidDiskFailure
|
|
||||||
expr: node_md_disks{state="failed"} > 0
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host RAID disk failure (instance {{ $labels.instance }})
|
|
||||||
description: "At least one device in RAID array on {{ $labels.instance }} failed. Array {{ $labels.md_device }} needs attention and possibly a disk swap\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
|
|
||||||
## FOR CONTAINERS #####################################################
|
|
||||||
|
|
||||||
- name: containers
|
|
||||||
rules:
|
|
||||||
- alert: nextcloud_down
|
|
||||||
expr: absent(container_memory_usage_bytes{name="jenkins"})
|
|
||||||
for: 30s
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: "Nextcloud down"
|
|
||||||
description: "Nextcloud container is down for more than 30 seconds."
|
|
||||||
|
|
||||||
- alert: ContainerCpuUsage
|
|
||||||
expr: (sum(rate(container_cpu_usage_seconds_total{name!=""}[3m])) BY (instance, name) * 100) > 80
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Container CPU usage (instance {{ $labels.instance }})
|
|
||||||
description: "Container CPU usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: ContainerMemoryUsage
|
|
||||||
expr: (sum(container_memory_working_set_bytes{name!=""}) BY (instance, name) / sum(container_spec_memory_limit_bytes > 0) BY (instance, name) * 100) > 80
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Container Memory usage (instance {{ $labels.instance }})
|
|
||||||
description: "Container Memory usage is above 80%\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
|
|
||||||
## FOR NGINX ##########################################################
|
|
||||||
|
|
||||||
- name: nginx
|
|
||||||
rules:
|
|
||||||
- alert: NginxHighHttp4xxErrorRate
|
|
||||||
expr: sum(rate(nginx_http_requests_total{status=~"^4.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Nginx high HTTP 4xx error rate (instance {{ $labels.instance }})
|
|
||||||
description: "Too many HTTP requests with status 4xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
- alert: NginxHighHttp5xxErrorRate
|
|
||||||
expr: sum(rate(nginx_http_requests_total{status=~"^5.."}[1m])) / sum(rate(nginx_http_requests_total[1m])) * 100 > 5
|
|
||||||
for: 1m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Nginx high HTTP 5xx error rate (instance {{ $labels.instance }})
|
|
||||||
description: "Too many HTTP requests with status 5xx (> 5%)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
|
||||||
|
|
||||||
@ -11,12 +11,11 @@ route:
|
|||||||
receivers:
|
receivers:
|
||||||
- name: 'email'
|
- name: 'email'
|
||||||
email_configs:
|
email_configs:
|
||||||
- to: 'mail1@mail.com, mail2@mail.com'
|
- to: 'mail-1@mail.com, mail-2@mail.com'
|
||||||
from: ''
|
from: 'mail@mail.com'
|
||||||
smarthost: ''
|
smarthost: 'smtp.mail-provider.net:port'
|
||||||
auth_username: ''
|
auth_username: 'mail@mail.com'
|
||||||
auth_identity: ''
|
auth_password: 'password'
|
||||||
auth_password: ''
|
|
||||||
require_tls: yes
|
require_tls: yes
|
||||||
send_resolved: true
|
send_resolved: true
|
||||||
|
|
||||||
|
|||||||
@ -1,11 +0,0 @@
|
|||||||
route:
|
|
||||||
receiver: 'slack'
|
|
||||||
|
|
||||||
receivers:
|
|
||||||
- name: 'slack'
|
|
||||||
slack_configs:
|
|
||||||
- send_resolved: true
|
|
||||||
text: "{{ .CommonAnnotations.description }}"
|
|
||||||
username: 'Prometheus'
|
|
||||||
channel: '#prometheus'
|
|
||||||
api_url: 'https://hooks.slack.com/services/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'
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,398 +0,0 @@
|
|||||||
{
|
|
||||||
"id": null,
|
|
||||||
"title": "Nginx",
|
|
||||||
"description": "Nginx exporter metrics",
|
|
||||||
"tags": [
|
|
||||||
"nginx"
|
|
||||||
],
|
|
||||||
"style": "dark",
|
|
||||||
"timezone": "browser",
|
|
||||||
"editable": true,
|
|
||||||
"hideControls": false,
|
|
||||||
"sharedCrosshair": true,
|
|
||||||
"rows": [
|
|
||||||
{
|
|
||||||
"collapse": false,
|
|
||||||
"editable": true,
|
|
||||||
"height": "250px",
|
|
||||||
"panels": [
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": false,
|
|
||||||
"datasource": "Prometheus",
|
|
||||||
"decimals": 2,
|
|
||||||
"editable": true,
|
|
||||||
"error": false,
|
|
||||||
"fill": 1,
|
|
||||||
"grid": {
|
|
||||||
"threshold1": null,
|
|
||||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
|
||||||
"threshold2": null,
|
|
||||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
|
||||||
},
|
|
||||||
"id": 3,
|
|
||||||
"isNew": true,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": true,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": true,
|
|
||||||
"linewidth": 2,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"span": 12,
|
|
||||||
"stack": false,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(irate(nginx_connections_processed_total{stage=\"any\"}[5m])) by (stage)",
|
|
||||||
"hide": false,
|
|
||||||
"interval": "",
|
|
||||||
"intervalFactor": 10,
|
|
||||||
"legendFormat": "requests",
|
|
||||||
"metric": "",
|
|
||||||
"refId": "B",
|
|
||||||
"step": 10
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Requests/sec",
|
|
||||||
"tooltip": {
|
|
||||||
"msResolution": false,
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "cumulative"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": 0,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": false,
|
|
||||||
"datasource": "Prometheus",
|
|
||||||
"decimals": 2,
|
|
||||||
"editable": true,
|
|
||||||
"error": false,
|
|
||||||
"fill": 1,
|
|
||||||
"grid": {
|
|
||||||
"threshold1": null,
|
|
||||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
|
||||||
"threshold2": null,
|
|
||||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
|
||||||
},
|
|
||||||
"id": 2,
|
|
||||||
"isNew": true,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": true,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": true,
|
|
||||||
"linewidth": 2,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"span": 12,
|
|
||||||
"stack": false,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(nginx_connections_current) by (state)",
|
|
||||||
"interval": "",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "{{state}}",
|
|
||||||
"metric": "",
|
|
||||||
"refId": "A",
|
|
||||||
"step": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Connections",
|
|
||||||
"tooltip": {
|
|
||||||
"msResolution": false,
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "cumulative"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": 0,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": false,
|
|
||||||
"datasource": "Prometheus",
|
|
||||||
"decimals": 2,
|
|
||||||
"editable": true,
|
|
||||||
"error": false,
|
|
||||||
"fill": 1,
|
|
||||||
"grid": {
|
|
||||||
"threshold1": null,
|
|
||||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
|
||||||
"threshold2": null,
|
|
||||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
|
||||||
},
|
|
||||||
"id": 1,
|
|
||||||
"isNew": true,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": true,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": true,
|
|
||||||
"linewidth": 2,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"span": 12,
|
|
||||||
"stack": false,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(irate(nginx_connections_processed_total{stage!=\"any\"}[5m])) by (stage)",
|
|
||||||
"hide": false,
|
|
||||||
"interval": "",
|
|
||||||
"intervalFactor": 10,
|
|
||||||
"legendFormat": "{{stage}}",
|
|
||||||
"metric": "",
|
|
||||||
"refId": "B",
|
|
||||||
"step": 10
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "Connections rate",
|
|
||||||
"tooltip": {
|
|
||||||
"msResolution": false,
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "cumulative"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": 0,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Nginx exporter metrics"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"collapse": false,
|
|
||||||
"editable": true,
|
|
||||||
"height": "250px",
|
|
||||||
"panels": [
|
|
||||||
{
|
|
||||||
"aliasColors": {},
|
|
||||||
"bars": false,
|
|
||||||
"datasource": null,
|
|
||||||
"editable": true,
|
|
||||||
"error": false,
|
|
||||||
"fill": 1,
|
|
||||||
"grid": {
|
|
||||||
"threshold1": null,
|
|
||||||
"threshold1Color": "rgba(216, 200, 27, 0.27)",
|
|
||||||
"threshold2": null,
|
|
||||||
"threshold2Color": "rgba(234, 112, 112, 0.22)"
|
|
||||||
},
|
|
||||||
"id": 4,
|
|
||||||
"isNew": true,
|
|
||||||
"legend": {
|
|
||||||
"alignAsTable": true,
|
|
||||||
"avg": true,
|
|
||||||
"current": true,
|
|
||||||
"max": true,
|
|
||||||
"min": true,
|
|
||||||
"rightSide": true,
|
|
||||||
"show": true,
|
|
||||||
"total": false,
|
|
||||||
"values": true
|
|
||||||
},
|
|
||||||
"lines": true,
|
|
||||||
"linewidth": 2,
|
|
||||||
"links": [],
|
|
||||||
"nullPointMode": "connected",
|
|
||||||
"percentage": false,
|
|
||||||
"pointradius": 5,
|
|
||||||
"points": false,
|
|
||||||
"renderer": "flot",
|
|
||||||
"seriesOverrides": [],
|
|
||||||
"span": 12,
|
|
||||||
"stack": false,
|
|
||||||
"steppedLine": false,
|
|
||||||
"targets": [
|
|
||||||
{
|
|
||||||
"expr": "sum(rate(container_cpu_usage_seconds_total{name=~\"nginx\"}[5m])) / count(node_cpu_seconds_total{mode=\"system\"}) * 100",
|
|
||||||
"intervalFactor": 2,
|
|
||||||
"legendFormat": "nginx",
|
|
||||||
"refId": "A",
|
|
||||||
"step": 2
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"timeFrom": null,
|
|
||||||
"timeShift": null,
|
|
||||||
"title": "CPU usage",
|
|
||||||
"tooltip": {
|
|
||||||
"msResolution": false,
|
|
||||||
"shared": true,
|
|
||||||
"sort": 0,
|
|
||||||
"value_type": "cumulative"
|
|
||||||
},
|
|
||||||
"type": "graph",
|
|
||||||
"xaxis": {
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
"yaxes": [
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"format": "short",
|
|
||||||
"label": null,
|
|
||||||
"logBase": 1,
|
|
||||||
"max": null,
|
|
||||||
"min": null,
|
|
||||||
"show": true
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"title": "Nginx container metrics"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"time": {
|
|
||||||
"from": "now-15m",
|
|
||||||
"to": "now"
|
|
||||||
},
|
|
||||||
"timepicker": {
|
|
||||||
"refresh_intervals": [
|
|
||||||
"5s",
|
|
||||||
"10s",
|
|
||||||
"30s",
|
|
||||||
"1m",
|
|
||||||
"5m",
|
|
||||||
"15m",
|
|
||||||
"30m",
|
|
||||||
"1h",
|
|
||||||
"2h",
|
|
||||||
"1d"
|
|
||||||
],
|
|
||||||
"time_options": [
|
|
||||||
"5m",
|
|
||||||
"15m",
|
|
||||||
"1h",
|
|
||||||
"6h",
|
|
||||||
"12h",
|
|
||||||
"24h",
|
|
||||||
"2d",
|
|
||||||
"7d",
|
|
||||||
"30d"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"templating": {
|
|
||||||
"list": []
|
|
||||||
},
|
|
||||||
"annotations": {
|
|
||||||
"list": []
|
|
||||||
},
|
|
||||||
"refresh": "10s",
|
|
||||||
"schemaVersion": 12,
|
|
||||||
"version": 9,
|
|
||||||
"links": [],
|
|
||||||
"gnetId": null
|
|
||||||
}
|
|
||||||
11
grafana/provisioning/notifiers/email.yaml
Normal file
11
grafana/provisioning/notifiers/email.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
notifiers:
|
||||||
|
- name: garagenum
|
||||||
|
type: email
|
||||||
|
uid: 1
|
||||||
|
isDefault: true
|
||||||
|
sendReminder: true
|
||||||
|
disableResolveMessage: false
|
||||||
|
settings:
|
||||||
|
addresses: email-1@mail.com;email-2@mail.com
|
||||||
@ -52,6 +52,7 @@ scrape_configs:
|
|||||||
# labels:
|
# labels:
|
||||||
# instance: serveur-distant
|
# instance: serveur-distant
|
||||||
|
|
||||||
|
# SI ALERT VIA ALERTMANAGER SINON COMMENTER !
|
||||||
alerting:
|
alerting:
|
||||||
alertmanagers:
|
alertmanagers:
|
||||||
- scheme: http
|
- scheme: http
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user