diff --git a/grafana/provisioning/alerting/cpu-alert-rules.yml b/grafana/provisioning/alerting/cpu-alert-rules.yml new file mode 100644 index 0000000..a15550a --- /dev/null +++ b/grafana/provisioning/alerting/cpu-alert-rules.yml @@ -0,0 +1,420 @@ +apiVersion: 1 + +groups: + - orgId: 1 + name: alertes + folder: rules + interval: 1m + rules: + - uid: cpu-gnprod + title: CPU High - GNPROD + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"gnprod", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-adventure + title: CPU High - ADVENTURE + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"adventure", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-webhosting + title: CPU High - WEBHOSTING + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"cicd-server", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-backup-prod + title: CPU High - PRODBACKUP + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"prod-backup", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-prod-2 + title: CPU High - PROD-2 + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"prod-2", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-prod-3 + title: CPU High - PROD-3 / ODOO + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"prod-3", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email + + - uid: cpu-garage-ai + title: CPU High - GARAGE-AI + condition: H + data: + - refId: A + relativeTimeRange: + from: 300 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + expr: 100 - (avg(rate(node_cpu_seconds_total{instance=~"garage-ai", mode="idle"}[1m])) * 100) + format: time_series + instant: false + interval: 15s + refId: A + + - refId: F + datasourceUid: __expr__ + model: + expression: A + type: reduce + reducer: last + refId: F + + - refId: H + datasourceUid: __expr__ + model: + expression: F + type: threshold + refId: H + conditions: + - evaluator: + type: gt + params: [80] + operator: + type: and + query: + params: [H] + reducer: + type: last + params: [] + type: query + + dashboardUid: EbtmMBZSk + panelId: 27 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "27" + labels: {} + isPaused: false + notification_settings: + receiver: garagenum-email diff --git a/grafana/provisioning/alerting/disk-alert-rules.yml b/grafana/provisioning/alerting/disk-alert-rules.yml new file mode 100644 index 0000000..8f71409 --- /dev/null +++ b/grafana/provisioning/alerting/disk-alert-rules.yml @@ -0,0 +1,700 @@ +apiVersion: 1 +groups: + - orgId: 1 + name: DISK + folder: rules + interval: 1m + rules: + +# GNPROD + - uid: disk-used-gnprod + title: DISK used - GNPROD + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'gnprod',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'gnprod',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'gnprod',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'gnprod',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'gnprod',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# ADVENTURE + - uid: disk-used-adventure + title: DISK used - ADVENTURE + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'adventure',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'adventure',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'adventure',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'adventure',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'adventure',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD-BACKUP + - uid: disk-used-prodbackup + title: DISK used - PROD-BACKUP + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: ((1 - (node_filesystem_free_bytes{instance=~"prod-backup", fstype=~"ext.*|xfs", mountpoint="/home/garage/BACKUP"} / node_filesystem_size_bytes{instance=~"prod-backup", fstype=~"ext.*|xfs", mountpoint="/home/garage/BACKUP"})) * 100) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# WEBHOSTING + - uid: disk-used-webhosting + title: DISK used - WEBHOSTING + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'cicd-server',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'cicd-server',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'cicd-server',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'cicd-server',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'cicd-server',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD-2 + - uid: disk-used-prod-2 + title: DISK used - PROD-2 + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'prod-2',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'prod-2',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'prod-2',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'prod-2',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'prod-2',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD-3 / ODOO + - uid: disk-used-prod-3 + title: DISK used - PROD-3 + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'prod-3',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'prod-3',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'prod-3',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'prod-3',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'prod-3',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# GARAGE-AI + - uid: disk-used-garage-ai + title: DISK used - GARAGE-AI + condition: G + data: + - refId: A + queryType: randomWalk + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + editorMode: code + exemplar: true + expr: (node_filesystem_size_bytes{instance=~'garage-ai',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'garage-ai',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}) *100/(node_filesystem_avail_bytes {instance=~'garage-ai',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}+(node_filesystem_size_bytes{instance=~'garage-ai',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"}-node_filesystem_free_bytes{instance=~'garage-ai',fstype=~"ext.*|xfs",mountpoint !~".*pod.*"})) + instant: false + interval: 5s + intervalMs: 5000 + legendFormat: gnprod + maxDataPoints: 43200 + queryType: randomWalk + range: false + refId: A + - refId: D + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - D + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: D + type: reduce + - refId: G + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - G + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: D + intervalMs: 1000 + maxDataPoints: 43200 + refId: G + type: threshold + dashboardUid: EbtmMBZSk + panelId: 31 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "31" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email diff --git a/grafana/provisioning/alerting/mail-alert.yml b/grafana/provisioning/alerting/mail-alert.yml new file mode 100644 index 0000000..f596c1c --- /dev/null +++ b/grafana/provisioning/alerting/mail-alert.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +contactPoints: + - orgId: 1 + name: garagenum-email + receivers: + - uid: aesp1bndu8xz4e + type: email + settings: + addresses: contact@legaragenumerique.fr,greg.lebreton@hotmail.com + singleEmail: false + disableResolveMessage: false \ No newline at end of file diff --git a/grafana/provisioning/alerting/ram-alert-rules.yml b/grafana/provisioning/alerting/ram-alert-rules.yml new file mode 100644 index 0000000..93c79c9 --- /dev/null +++ b/grafana/provisioning/alerting/ram-alert-rules.yml @@ -0,0 +1,644 @@ +apiVersion: 1 +groups: + - orgId: 1 + name: RAM + folder: rules + interval: 1m + rules: + +# GNPROD + - uid: ram-used-gnprod + title: RAM used - GNPROD + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="gnprod"} / (node_memory_MemTotal_bytes{instance="gnprod"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# ADVENTURE + - uid: ram-used-adventure + title: RAM used - ADVENTURE + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="adventure"} / (node_memory_MemTotal_bytes{instance="adventure"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD BACKUP + - uid: ram-used-prodbackup + title: RAM used - PROD-BACKUP + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="prod-backup"} / (node_memory_MemTotal_bytes{instance="prod-backup"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# WEBHOSTING + - uid: ram-used-webhosting + title: RAM used - WEBHOSTING + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="cicd-server"} / (node_memory_MemTotal_bytes{instance="cicd-server"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD-2 + - uid: ram-used-prod2 + title: RAM used - PROD-2 + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="prod-2"} / (node_memory_MemTotal_bytes{instance="prod-2"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# PROD-3 + - uid: ram-used-prod-3 + title: RAM used - PROD-3 / ODOO + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="prod-3"} / (node_memory_MemTotal_bytes{instance="prod-3"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email + +# GARAGE-AI + - uid: ram-used-garage-ai + title: RAM used - GARAGE-AI + condition: F + data: + - refId: A + relativeTimeRange: + from: 86400 + to: 0 + datasourceUid: PBFA97CFB590B2093 + model: + datasource: + type: prometheus + uid: PBFA97CFB590B2093 + exemplar: true + expr: (1 - (node_memory_MemAvailable_bytes{instance="garage-ai"} / (node_memory_MemTotal_bytes{instance="garage-ai"})))* 100 + format: time_series + instant: false + interval: "1" + intervalFactor: 1 + intervalMs: 5000 + legendFormat: '{{instance}}' + maxDataPoints: 43200 + refId: A + - refId: C + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: [] + type: gt + operator: + type: and + query: + params: + - C + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: A + intervalMs: 1000 + maxDataPoints: 43200 + reducer: last + refId: C + type: reduce + - refId: F + datasourceUid: __expr__ + model: + conditions: + - evaluator: + params: + - 80 + type: gt + operator: + type: and + query: + params: + - F + reducer: + params: [] + type: last + type: query + datasource: + type: __expr__ + uid: __expr__ + expression: C + intervalMs: 1000 + maxDataPoints: 43200 + refId: F + type: threshold + dashboardUid: EbtmMBZSk + panelId: 29 + noDataState: NoData + execErrState: Error + for: 1m + annotations: + __dashboardUid__: EbtmMBZSk + __panelId__: "29" + description: "" + runbook_url: "" + summary: "" + labels: + "": "" + isPaused: false + notification_settings: + receiver: garagenum-email \ No newline at end of file diff --git a/grafana/provisioning/notifiers/email.yaml b/grafana/provisioning/notifiers.depracated/email.yaml similarity index 100% rename from grafana/provisioning/notifiers/email.yaml rename to grafana/provisioning/notifiers.depracated/email.yaml