Deploy AlertManager configuration
This commit is contained in:
parent
817aa3e0fc
commit
7ab99e4530
|
@ -1,4 +1,5 @@
|
|||
*.log
|
||||
*.out
|
||||
*.pem
|
||||
.env
|
||||
.lsp-repl-history
|
||||
|
|
5
Makefile
5
Makefile
|
@ -290,7 +290,10 @@ fmt: fmt-c fmt-go fmt-python fmt-terraform fmt-web # Format all code
|
|||
packer: supervisor # Build and publish a new webserver AMI
|
||||
tools/packer-build.bash
|
||||
|
||||
|
||||
deploy-alerts: # Deploy alerting configuration to Grafana Cloud
|
||||
envsubst < grafana/alertmanager.yaml > grafana/alertmanager.yaml.out
|
||||
cortextool rules load grafana/alerts.yaml --address=https://prometheus-blocks-prod-us-central1.grafana.net --id=$(GRAFANA_PROMETHEUS_USERNAME) --key=$(GRAFANA_API_KEY)
|
||||
cortextool alertmanager load grafana/alertmanager.yaml.out --address=https://alertmanager-us-central1.grafana.net --id=$(GRAFANA_ALERTMANAGER_USERNAME) --key=$(GRAFANA_API_KEY)
|
||||
|
||||
### Miscellaneous
|
||||
|
||||
|
|
|
@ -44,6 +44,7 @@ dctrl-tools
|
|||
docker-ce-cli
|
||||
file
|
||||
g++
|
||||
gettext
|
||||
git
|
||||
golang
|
||||
htop
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
receivers:
|
||||
- name: pagerduty
|
||||
pagerduty_configs:
|
||||
- routing_key: "$PAGERDUTY_INTEGRATION_KEY"
|
||||
route:
|
||||
receiver: pagerduty
|
|
@ -0,0 +1,28 @@
|
|||
namespace: riju
|
||||
groups:
|
||||
- name: riju
|
||||
rules:
|
||||
- alert: NodeCPUHigh
|
||||
annotations:
|
||||
message: "Instance {{ $labels.node }} is running close to max CPU"
|
||||
expr: |
|
||||
sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m])) by (node) / count(sum(node_cpu_seconds_total{mode="idle"}) by (node, cpu)) by (node) * 100 >= 80
|
||||
for: 30m
|
||||
- alert: NodeMemoryHigh
|
||||
annotations:
|
||||
message: "Instance {{ $labels.node }} is running close to max memory"
|
||||
expr: |
|
||||
sum(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) by (node) * 100 >= 80
|
||||
for: 30m
|
||||
- alert: RootVolumeFilling
|
||||
annotations:
|
||||
message: "Root volume on instance {{ $labels.node }} is close to full"
|
||||
expr: |
|
||||
(1 - sum (node_filesystem_free_bytes{mountpoint="/"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/"}) by (node)) * 100
|
||||
for: 30m
|
||||
- alert: DataVolumeFilling
|
||||
annotations:
|
||||
message: "Data volume on instance {{ $labels.node }} is close to full"
|
||||
expr: |
|
||||
(1 - sum (node_filesystem_free_bytes{mountpoint="/mnt/riju"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/mnt/riju"}) by (node)) * 100
|
||||
for: 30m
|
Loading…
Reference in New Issue