mirror of
https://github.com/makayabou/asg-server.git
synced 2026-05-02 17:43:36 +02:00
39 lines
1.4 KiB
YAML
39 lines
1.4 KiB
YAML
groups:
|
|
- name: online_metrics_alerts
|
|
rules:
|
|
- alert: OnlineStatusErrors
|
|
expr: sum(increase(sms_online_status_set_total{status="error"}[5m])) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High number of online status errors"
|
|
description: "The number of online status errors has exceeded 10 in the last 5 minutes."
|
|
|
|
- alert: CacheOperationErrors
|
|
expr: sum by (operation) (increase(sms_online_cache_operations_total{status="error"}[5m])) > 5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High number of cache operation errors"
|
|
description: "Cache errors for operation={{ $labels.operation }} exceeded 5 in 5m."
|
|
|
|
- alert: PersistenceErrors
|
|
expr: sum(increase(sms_online_persistence_errors_total[5m])) > 0
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Persistence errors detected"
|
|
description: "Persistence errors have been detected in the online module."
|
|
|
|
- alert: HighPersistenceLatency
|
|
expr: histogram_quantile(0.95, sum(rate(sms_online_persistence_latency_seconds_bucket[5m])) by (le)) > 0.5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High persistence latency"
|
|
description: "The 95th percentile persistence latency has exceeded 0.5 seconds."
|