Compare commits

...

2 Commits
main ... health

View File

@ -6,12 +6,11 @@ networks:
- subnet: 192.168.100.0/24 - subnet: 192.168.100.0/24
volumes: volumes:
prometheus_data: {} prometheus_data: {}
grafana_data: {} grafana_data: {}
services: services:
# POUR AFFICHAGE DASHBOARD
grafana: grafana:
image: grafana/grafana:11.2.0 image: grafana/grafana:11.2.0
container_name: grafana container_name: grafana
@ -30,8 +29,12 @@ services:
ipv4_address: 192.168.100.10 ipv4_address: 192.168.100.10
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"]
interval: 30s
timeout: 5s
retries: 3
# METRICS GATHERER
prometheus: prometheus:
image: prom/prometheus:v2.54.1 image: prom/prometheus:v2.54.1
container_name: prometheus container_name: prometheus
@ -52,8 +55,13 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "wget", "http://localhost:9090"]
interval: 10s
timeout: 15s
retries: 10
start_period: 40s
# FOR HOST METRICS
nodeexporter: nodeexporter:
image: prom/node-exporter:v1.8.2 image: prom/node-exporter:v1.8.2
container_name: nodeexporter container_name: nodeexporter
@ -73,8 +81,8 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
# FOR DOCKER CONTAINERS
cadvisor: cadvisor:
image: gcr.io/cadvisor/cadvisor image: gcr.io/cadvisor/cadvisor
container_name: cadvisor container_name: cadvisor
@ -91,8 +99,8 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
# LOKI FOR LOGS
loki: loki:
image: grafana/loki:latest image: grafana/loki:latest
container_name: loki container_name: loki
@ -103,19 +111,22 @@ services:
- ./loki/cert:/etc/loki/cert - ./loki/cert:/etc/loki/cert
- /etc/localtime:/etc/localtime:ro - /etc/localtime:/etc/localtime:ro
- /etc/timezone:/etc/timezone:ro - /etc/timezone:/etc/timezone:ro
# environment:
command: -config.file=/etc/loki/config.yml -config.expand-env=true command: -config.file=/etc/loki/config.yml -config.expand-env=true
networks: networks:
- grafana-network - grafana-network
healthcheck:
test: wget --quiet --tries=1 --output-document=- http://localhost:3100/ready | grep -q -w ready || exit 1
start_period: 20s
interval: 10s
timeout: 1s
retries: 12 # try for 2 minutes
# FOR ALERTS
alertmanager: alertmanager:
image: prom/alertmanager:v0.20.0 image: prom/alertmanager:v0.20.0
container_name: alertmanager container_name: alertmanager
volumes: volumes:
- ./alertmanager:/etc/alertmanager - ./alertmanager:/etc/alertmanager
command: command:
#- '--config.file=/etc/alertmanager/config.yml'
- '--config.file=/etc/alertmanager/alertmanager.yml' - '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager' - '--storage.path=/alertmanager'
restart: unless-stopped restart: unless-stopped
@ -125,6 +136,11 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9093/-/ready"]
interval: 30s
timeout: 5s
retries: 3
pushgateway: pushgateway:
image: prom/pushgateway:v1.2.0 image: prom/pushgateway:v1.2.0
@ -136,3 +152,8 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9091/metrics"]
interval: 30s
timeout: 5s
retries: 3