Deploy AlertManager configuration
This commit is contained in:
parent
817aa3e0fc
commit
7ab99e4530
|
@ -1,4 +1,5 @@
|
||||||
*.log
|
*.log
|
||||||
|
*.out
|
||||||
*.pem
|
*.pem
|
||||||
.env
|
.env
|
||||||
.lsp-repl-history
|
.lsp-repl-history
|
||||||
|
|
5
Makefile
5
Makefile
|
@ -290,7 +290,10 @@ fmt: fmt-c fmt-go fmt-python fmt-terraform fmt-web # Format all code
|
||||||
packer: supervisor # Build and publish a new webserver AMI
|
packer: supervisor # Build and publish a new webserver AMI
|
||||||
tools/packer-build.bash
|
tools/packer-build.bash
|
||||||
|
|
||||||
|
deploy-alerts: # Deploy alerting configuration to Grafana Cloud
|
||||||
|
envsubst < grafana/alertmanager.yaml > grafana/alertmanager.yaml.out
|
||||||
|
cortextool rules load grafana/alerts.yaml --address=https://prometheus-blocks-prod-us-central1.grafana.net --id=$(GRAFANA_PROMETHEUS_USERNAME) --key=$(GRAFANA_API_KEY)
|
||||||
|
cortextool alertmanager load grafana/alertmanager.yaml.out --address=https://alertmanager-us-central1.grafana.net --id=$(GRAFANA_ALERTMANAGER_USERNAME) --key=$(GRAFANA_API_KEY)
|
||||||
|
|
||||||
### Miscellaneous
|
### Miscellaneous
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ dctrl-tools
|
||||||
docker-ce-cli
|
docker-ce-cli
|
||||||
file
|
file
|
||||||
g++
|
g++
|
||||||
|
gettext
|
||||||
git
|
git
|
||||||
golang
|
golang
|
||||||
htop
|
htop
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
receivers:
|
||||||
|
- name: pagerduty
|
||||||
|
pagerduty_configs:
|
||||||
|
- routing_key: "$PAGERDUTY_INTEGRATION_KEY"
|
||||||
|
route:
|
||||||
|
receiver: pagerduty
|
|
@ -0,0 +1,28 @@
|
||||||
|
namespace: riju
|
||||||
|
groups:
|
||||||
|
- name: riju
|
||||||
|
rules:
|
||||||
|
- alert: NodeCPUHigh
|
||||||
|
annotations:
|
||||||
|
message: "Instance {{ $labels.node }} is running close to max CPU"
|
||||||
|
expr: |
|
||||||
|
sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m])) by (node) / count(sum(node_cpu_seconds_total{mode="idle"}) by (node, cpu)) by (node) * 100 >= 80
|
||||||
|
for: 30m
|
||||||
|
- alert: NodeMemoryHigh
|
||||||
|
annotations:
|
||||||
|
message: "Instance {{ $labels.node }} is running close to max memory"
|
||||||
|
expr: |
|
||||||
|
sum(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) by (node) * 100 >= 80
|
||||||
|
for: 30m
|
||||||
|
- alert: RootVolumeFilling
|
||||||
|
annotations:
|
||||||
|
message: "Root volume on instance {{ $labels.node }} is close to full"
|
||||||
|
expr: |
|
||||||
|
(1 - sum (node_filesystem_free_bytes{mountpoint="/"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/"}) by (node)) * 100
|
||||||
|
for: 30m
|
||||||
|
- alert: DataVolumeFilling
|
||||||
|
annotations:
|
||||||
|
message: "Data volume on instance {{ $labels.node }} is close to full"
|
||||||
|
expr: |
|
||||||
|
(1 - sum (node_filesystem_free_bytes{mountpoint="/mnt/riju"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/mnt/riju"}) by (node)) * 100
|
||||||
|
for: 30m
|
Loading…
Reference in New Issue