Deploy AlertManager configuration
This commit is contained in:
		
							parent
							
								
									817aa3e0fc
								
							
						
					
					
						commit
						7ab99e4530
					
				|  | @ -1,4 +1,5 @@ | |||
| *.log | ||||
| *.out | ||||
| *.pem | ||||
| .env | ||||
| .lsp-repl-history | ||||
|  |  | |||
							
								
								
									
										5
									
								
								Makefile
								
								
								
								
							
							
						
						
									
										5
									
								
								Makefile
								
								
								
								
							|  | @ -290,7 +290,10 @@ fmt: fmt-c fmt-go fmt-python fmt-terraform fmt-web # Format all code | |||
| packer: supervisor # Build and publish a new webserver AMI
 | ||||
| 	tools/packer-build.bash | ||||
| 
 | ||||
| 
 | ||||
| deploy-alerts: # Deploy alerting configuration to Grafana Cloud
 | ||||
| 	envsubst < grafana/alertmanager.yaml > grafana/alertmanager.yaml.out | ||||
| 	cortextool rules load grafana/alerts.yaml --address=https://prometheus-blocks-prod-us-central1.grafana.net --id=$(GRAFANA_PROMETHEUS_USERNAME) --key=$(GRAFANA_API_KEY) | ||||
| 	cortextool alertmanager load grafana/alertmanager.yaml.out --address=https://alertmanager-us-central1.grafana.net --id=$(GRAFANA_ALERTMANAGER_USERNAME) --key=$(GRAFANA_API_KEY) | ||||
| 
 | ||||
| ### Miscellaneous
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -44,6 +44,7 @@ dctrl-tools | |||
| docker-ce-cli | ||||
| file | ||||
| g++ | ||||
| gettext | ||||
| git | ||||
| golang | ||||
| htop | ||||
|  |  | |||
|  | @ -0,0 +1,6 @@ | |||
| receivers: | ||||
|   - name: pagerduty | ||||
|     pagerduty_configs: | ||||
|       - routing_key: "$PAGERDUTY_INTEGRATION_KEY" | ||||
| route: | ||||
|   receiver: pagerduty | ||||
|  | @ -0,0 +1,28 @@ | |||
| namespace: riju | ||||
| groups: | ||||
|   - name: riju | ||||
|     rules: | ||||
|       - alert: NodeCPUHigh | ||||
|         annotations: | ||||
|           message: "Instance {{ $labels.node }} is running close to max CPU" | ||||
|         expr: | | ||||
|           sum(1 - rate(node_cpu_seconds_total{mode="idle"}[1m])) by (node) / count(sum(node_cpu_seconds_total{mode="idle"}) by (node, cpu)) by (node) * 100 >= 80 | ||||
|         for: 30m | ||||
|       - alert: NodeMemoryHigh | ||||
|         annotations: | ||||
|           message: "Instance {{ $labels.node }} is running close to max memory" | ||||
|         expr: | | ||||
|           sum(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) by (node) * 100 >= 80 | ||||
|         for: 30m | ||||
|       - alert: RootVolumeFilling | ||||
|         annotations: | ||||
|           message: "Root volume on instance {{ $labels.node }} is close to full" | ||||
|         expr: | | ||||
|           (1 - sum (node_filesystem_free_bytes{mountpoint="/"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/"}) by (node)) * 100 | ||||
|         for: 30m | ||||
|       - alert: DataVolumeFilling | ||||
|         annotations: | ||||
|           message: "Data volume on instance {{ $labels.node }} is close to full" | ||||
|         expr: | | ||||
|           (1 - sum (node_filesystem_free_bytes{mountpoint="/mnt/riju"}) by (node) / sum (node_filesystem_size_bytes{mountpoint="/mnt/riju"}) by (node)) * 100 | ||||
|         for: 30m | ||||
		Loading…
	
		Reference in New Issue
	
	 Radon Rosborough
						Radon Rosborough