From 7ab99e45309cc0f05854f53ea3b8e5b2e03385ec Mon Sep 17 00:00:00 2001 From: Radon Rosborough Date: Sat, 12 Feb 2022 18:19:04 -0800 Subject: [PATCH] Deploy AlertManager configuration --- .gitignore | 1 + Makefile | 5 ++++- docker/admin/install.bash | 1 + grafana/alertmanager.yaml | 6 ++++++ grafana/alerts.yaml | 28 ++++++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 grafana/alertmanager.yaml create mode 100644 grafana/alerts.yaml diff --git a/.gitignore b/.gitignore index 9bc9590..8aeedf7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.log +*.out *.pem .env .lsp-repl-history diff --git a/Makefile b/Makefile index c165b03..0c82a3b 100644 --- a/Makefile +++ b/Makefile @@ -290,7 +290,10 @@ fmt: fmt-c fmt-go fmt-python fmt-terraform fmt-web # Format all code packer: supervisor # Build and publish a new webserver AMI tools/packer-build.bash - +deploy-alerts: # Deploy alerting configuration to Grafana Cloud + envsubst < grafana/alertmanager.yaml > grafana/alertmanager.yaml.out + cortextool rules load grafana/alerts.yaml --address=https://prometheus-blocks-prod-us-central1.grafana.net --id=$(GRAFANA_PROMETHEUS_USERNAME) --key=$(GRAFANA_API_KEY) + cortextool alertmanager load grafana/alertmanager.yaml.out --address=https://alertmanager-us-central1.grafana.net --id=$(GRAFANA_ALERTMANAGER_USERNAME) --key=$(GRAFANA_API_KEY) ### Miscellaneous diff --git a/docker/admin/install.bash b/docker/admin/install.bash index 3b7b394..9cc3ceb 100755 --- a/docker/admin/install.bash +++ b/docker/admin/install.bash @@ -44,6 +44,7 @@ dctrl-tools docker-ce-cli file g++ +gettext git golang htop diff --git a/grafana/alertmanager.yaml b/grafana/alertmanager.yaml new file mode 100644 index 0000000..a106b1b --- /dev/null +++ b/grafana/alertmanager.yaml @@ -0,0 +1,6 @@ +receivers: + - name: pagerduty + pagerduty_configs: + - routing_key: "$PAGERDUTY_INTEGRATION_KEY" +route: + receiver: pagerduty diff --git a/grafana/alerts.yaml b/grafana/alerts.yaml new file mode 100644 index 0000000..87b0b67 --- /dev/null +++ b/grafana/alerts.yaml @@ -0,0 +1,28 @@ +namespace: riju +groups: + - name: riju + rules: + - alert: NodeCPUHigh + annotations: + message: "Instance {{ $labels.node }} is running close to max CPU" + expr: | + sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m])) by (node) / count(sum(node_cpu_seconds_total{mode="idle"}) by (node, cpu)) by (node) * 100 >= 80 + for: 30m + - alert: NodeMemoryHigh + annotations: + message: "Instance {{ $labels.node }} is running close to max memory" + expr: | + sum(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) by (node) * 100 >= 80 + for: 30m + - alert: RootVolumeFilling + annotations: + message: "Root volume on instance {{ $labels.node }} is close to full" + expr: | + (1 - sum (node_filesystem_free_bytes{mountpoint="/"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/"}) by (node)) * 100 + for: 30m + - alert: DataVolumeFilling + annotations: + message: "Data volume on instance {{ $labels.node }} is close to full" + expr: | + (1 - sum (node_filesystem_free_bytes{mountpoint="/mnt/riju"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/mnt/riju"}) by (node)) * 100 + for: 30m