Deploy AlertManager configuration

This commit is contained in:
Radon Rosborough 2022-02-12 18:19:04 -08:00
parent 817aa3e0fc
commit 7ab99e4530
5 changed files with 40 additions and 1 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.log
*.out
*.pem
.env
.lsp-repl-history

View File

@ -290,7 +290,10 @@ fmt: fmt-c fmt-go fmt-python fmt-terraform fmt-web # Format all code
packer: supervisor # Build and publish a new webserver AMI
tools/packer-build.bash
deploy-alerts: # Deploy alerting configuration to Grafana Cloud
envsubst < grafana/alertmanager.yaml > grafana/alertmanager.yaml.out
cortextool rules load grafana/alerts.yaml --address=https://prometheus-blocks-prod-us-central1.grafana.net --id=$(GRAFANA_PROMETHEUS_USERNAME) --key=$(GRAFANA_API_KEY)
cortextool alertmanager load grafana/alertmanager.yaml.out --address=https://alertmanager-us-central1.grafana.net --id=$(GRAFANA_ALERTMANAGER_USERNAME) --key=$(GRAFANA_API_KEY)
### Miscellaneous

View File

@ -44,6 +44,7 @@ dctrl-tools
docker-ce-cli
file
g++
gettext
git
golang
htop

View File

@ -0,0 +1,6 @@
receivers:
- name: pagerduty
pagerduty_configs:
- routing_key: "$PAGERDUTY_INTEGRATION_KEY"
route:
receiver: pagerduty

28
grafana/alerts.yaml Normal file
View File

@ -0,0 +1,28 @@
namespace: riju
groups:
- name: riju
rules:
- alert: NodeCPUHigh
annotations:
message: "Instance {{ $labels.node }} is running close to max CPU"
expr: |
sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m])) by (node) / count(sum(node_cpu_seconds_total{mode="idle"}) by (node, cpu)) by (node) * 100 >= 80
for: 30m
- alert: NodeMemoryHigh
annotations:
message: "Instance {{ $labels.node }} is running close to max memory"
expr: |
sum(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) by (node) * 100 >= 80
for: 30m
- alert: RootVolumeFilling
annotations:
message: "Root volume on instance {{ $labels.node }} is close to full"
expr: |
(1 - sum (node_filesystem_free_bytes{mountpoint="/"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/"}) by (node)) * 100
for: 30m
- alert: DataVolumeFilling
annotations:
message: "Data volume on instance {{ $labels.node }} is close to full"
expr: |
(1 - sum (node_filesystem_free_bytes{mountpoint="/mnt/riju"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/mnt/riju"}) by (node)) * 100
for: 30m