prometheus,grafana,cadvisor: helthcheck ok healthcheck not ok: node exporter,pushgateway,alermanager

This commit is contained in:
hamza rahmani 2025-01-14 11:02:57 +01:00
parent 70f0d6f57e
commit abb30dc3fb

View File

@ -56,10 +56,11 @@ services:
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9090/-/healthy"] test: ["CMD", "wget", "http://localhost:9090"]
interval: 30s interval: 10s
timeout: 5s timeout: 15s
retries: 3 retries: 10
start_period: 40s
nodeexporter: nodeexporter:
image: prom/node-exporter:v1.8.2 image: prom/node-exporter:v1.8.2
@ -80,11 +81,7 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9100/metrics"]
interval: 30s
timeout: 5s
retries: 3
cadvisor: cadvisor:
image: gcr.io/cadvisor/cadvisor image: gcr.io/cadvisor/cadvisor
@ -102,11 +99,7 @@ services:
- grafana-network - grafana-network
labels: labels:
org.label-schema.group: "monitoring" org.label-schema.group: "monitoring"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/healthz"]
interval: 30s
timeout: 5s
retries: 3
loki: loki:
image: grafana/loki:latest image: grafana/loki:latest
@ -122,10 +115,11 @@ services:
networks: networks:
- grafana-network - grafana-network
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3100/ready"] test: wget --quiet --tries=1 --output-document=- http://localhost:3100/ready | grep -q -w ready || exit 1
interval: 30s start_period: 20s
timeout: 5s interval: 10s
retries: 3 timeout: 1s
retries: 12 # try for 2 minutes
alertmanager: alertmanager:
image: prom/alertmanager:v0.20.0 image: prom/alertmanager:v0.20.0