Many things fixed

This commit is contained in:
Radon Rosborough 2021-08-01 12:42:01 -07:00
parent 71619fb249
commit f5b7536235
14 changed files with 237 additions and 22 deletions

View File

@ -224,8 +224,8 @@ download: # L=<lang> T=<type> : Download last published .deb from S3
aws s3 cp $(S3_DEB) $(BUILD)/$(DEB)
undeploy: # Pull latest deployment config from S3
mkdir -p $(BUILD)
aws s3 cp $(S3_CONFIG) $(BUILD)/config.json
mkdir -p build
aws s3 cp $(S3_CONFIG) build/config.json
### Publish artifacts to registries
@ -251,7 +251,7 @@ deploy-config: # Generate deployment config file
node tools/generate-deploy-config.js
deploy-latest: # Upload deployment config to S3 and update ASG instances
aws s3 cp $(BUILD)/config.json $(S3_CONFIG)
aws s3 cp build/config.json $(S3_CONFIG)
deploy: deploy-config deploy-latest # Shorthand for deploy-config followed by deploy-latest

View File

@ -22,7 +22,7 @@
"disk": {
"measurement": ["used_percent"],
"metrics_collection_interval": 60,
"resources": ["*"]
"resources": ["/", "/mnt/riju"]
},
"mem": {
"measurement": ["mem_used_percent"],

3
packer/docker.json Normal file
View File

@ -0,0 +1,3 @@
{
"exec-opts": ["native.cgroupdriver=systemd"]
}

13
packer/promtail.service Normal file
View File

@ -0,0 +1,13 @@
[Unit]
Description=Promtail
StartLimitBurst=5
StartLimitIntervalSec=300
[Service]
Type=exec
ExecStart=promtail -config.file /etc/promtail/config.yaml
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target

63
packer/promtail.yaml Normal file
View File

@ -0,0 +1,63 @@
server:
http_listen_address: 0.0.0.0
http_listen_port: 9080
grpc_listen_port: 0
positions:
filename: /tmp/positions.yaml
client:
url: https://72217:$GRAFANA_API_KEY@logs-prod-us-central1.grafana.net/api/prom/push
scrape_configs:
- job_name: kernel
static_configs:
- labels:
source: kernel
__path__: /var/log/kern.log
- job_name: systemd
journal:
labels:
source: systemd
relabel_configs:
- source_labels:
- __journal__systemd_unit
regex: "(docker|riju)\\.service"
action: keep
- source_labels:
- __journal__systemd_unit
regex: "docker\\.service"
target_label: source
replacement: "dockerd"
- source_labels:
- __journal__systemd_unit
regex: "riju\\.service"
target_label: source
replacement: "supervisor"
- source_labels:
- source
regex: "systemd"
action: drop
- job_name: server
static_configs:
- labels:
source: server
__path__: /mnt/riju/docker/containers/*/*.log
pipeline_stages:
- json:
expressions:
log: log
stream: stream
tag: attrs.tag
time: time
- output:
source: log
- timestamp:
source: time
format: RFC3339Nano
- labels:
container: tag
stream: stream
- match:
selector: '{container!~"riju-app-(blue|green)"}'
action: drop

View File

@ -7,6 +7,10 @@ set -euo pipefail
: ${S3_BUCKET}
: ${SUPERVISOR_ACCESS_TOKEN}
latest_release() {
curl -sSL "https://api.github.com/repos/$1/releases/latest" | jq -r .tag_name
}
# I think there is a race condition related to Ubuntu wanting to do an
# automated system upgrade at boot, which causes 'apt-get update' to
# sometimes fail with an obscure error message.
@ -31,7 +35,7 @@ deb [arch=amd64] https://download.docker.com/linux/ubuntu ${ubuntu_name} stable
EOF
sudo -E apt-get update
sudo -E apt-get install -y docker-ce docker-ce-cli containerd.io unzip whois
sudo -E apt-get install -y docker-ce docker-ce-cli containerd.io jq unzip whois
wget -nv https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -O awscli.zip
unzip -q awscli.zip
@ -42,10 +46,14 @@ wget -nv https://s3.us-west-1.amazonaws.com/amazon-ssm-us-west-1/latest/debian_a
wget -nv https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb
sudo apt-get install -y ./amazon-cloudwatch-agent.deb
sudo chown root:root /tmp/cloudwatch.json /tmp/riju-init-volume /tmp/riju-supervisor /tmp/riju.service
sudo mv /tmp/riju-init-volume /tmp/riju-supervisor /usr/local/bin/
sudo mv /tmp/riju.service /etc/systemd/system/
sudo chown root:root \
/tmp/cloudwatch.json /tmp/docker.json /tmp/riju.service \
/tmp/riju.slice /tmp/riju-init-volume /tmp/riju-supervisor
sudo mv /tmp/docker.json /etc/docker/daemon.json
sudo mv /tmp/riju.service /tmp/riju.slice /etc/systemd/system/
sudo mv /tmp/cloudwatch.json /opt/aws/amazon-cloudwatch-agent/bin/config.json
sudo mv /tmp/riju-init-volume /tmp/riju-supervisor /usr/local/bin/
sudo sed -Ei 's/^#?PermitRootLogin .*/PermitRootLogin no/' /etc/ssh/sshd_config
sudo sed -Ei 's/^#?PasswordAuthentication .*/PasswordAuthentication no/' /etc/ssh/sshd_config
@ -61,6 +69,25 @@ sudo useradd admin -g admin -G sudo -s /usr/bin/bash -p "$(echo "${ADMIN_PASSWOR
sudo amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/bin/config.json
sudo systemctl enable riju
if [[ -n "${GRAFANA_API_KEY:-}" ]]; then
ver="$(latest_release grafana/loki)"
wget -nv "https://github.com/grafana/loki/releases/download/${ver}/promtail-linux-amd64.zip"
unzip promtail-linux-amd64.zip
sudo cp promtail-linux-amd64 /usr/local/bin/promtail
sudo chown root:root /tmp/promtail.service /tmp/promtail.yaml
sudo mkdir /etc/promtail
sudo mv /tmp/promtail.yaml /etc/promtail/config.yaml
sudo mv /tmp/promtail.service /etc/systemd/system/
sudo sed -Ei "s/\\\$GRAFANA_API_KEY/${GRAFANA_API_KEY}/" /etc/promtail/config.yaml
sudo systemctl enable promtail
else
sudo rm /tmp/promtail.yaml /tmp/promtail.service
fi
sudo userdel ubuntu -f
popd

14
packer/riju.slice Normal file
View File

@ -0,0 +1,14 @@
[Unit]
Description=Resource limits for Riju user containers
Before=slices.target
[Slice]
CPUAccounting=true
CPUQuota=100%
MemoryAccounting=true
MemoryMax=1G
MemorySwapMax=8G
TasksAccounting=true
TasksMax=2048
IPAccounting=true
IPAddressDeny=169.254.169.254

View File

@ -13,6 +13,11 @@ variable "fathom_site_id" {
default = "${env("FATHOM_SITE_ID")}"
}
variable "grafana_api_key" {
type = string
default = "${env("GRAFANA_API_KEY")}"
}
variable "s3_bucket" {
type = string
default = "${env("S3_BUCKET")}"
@ -67,6 +72,31 @@ build {
source = "cloudwatch.json"
}
provisioner "file" {
destination = "/tmp/docker.json"
source = "docker.json"
}
provisioner "file" {
destination = "/tmp/promtail.service"
source = "promtail.service"
}
provisioner "file" {
destination = "/tmp/promtail.yaml"
source = "promtail.yaml"
}
provisioner "file" {
destination = "/tmp/riju.service"
source = "riju.service"
}
provisioner "file" {
destination = "/tmp/riju.slice"
source = "riju.slice"
}
provisioner "file" {
destination = "/tmp/riju-init-volume"
source = "riju-init-volume"
@ -77,16 +107,12 @@ build {
source = "../supervisor/out/riju-supervisor"
}
provisioner "file" {
destination = "/tmp/riju.service"
source = "riju.service"
}
provisioner "shell" {
environment_vars = [
"ADMIN_PASSWORD=${var.admin_password}",
"AWS_REGION=${var.aws_region}",
"FATHOM_SITE_ID=${var.fathom_site_id}",
"GRAFANA_API_KEY=${var.grafana_api_key}",
"S3_BUCKET=${var.s3_bucket}",
"SUPERVISOR_ACCESS_TOKEN=${var.supervisor_access_token}",
]

View File

@ -352,8 +352,7 @@ func (sv *supervisor) reload() error {
"--label", fmt.Sprintf("riju.deploy-config-hash=%s", deployCfgHash),
"--name", name,
"--restart", "unless-stopped",
"--oom-kill-disable",
"--cpu-shares", "2048",
"--log-opt", "tag={{.Name}}",
fmt.Sprintf("riju:%s", deployCfg.AppImageTag),
)
dockerRun.Stdout = os.Stdout

View File

@ -83,6 +83,8 @@ void session(char *uuid, char *lang, char *imageHash)
die("asprintf failed");
if (mknod(fifo, 0700 | S_IFIFO, 0) < 0)
die("mknod failed");
char sentinel[] = "cat /var/run/riju/sentinel/fifo | ( sleep 10; while "
"read -t2; do :; done; pkill -g0 )";
pid_t pid = fork();
if (pid < 0)
die("fork failed");
@ -128,14 +130,15 @@ void session(char *uuid, char *lang, char *imageHash)
"--memory",
"1g",
"--memory-swap",
"3g",
"8g",
"--pids-limit",
"512",
"2048",
"--cgroup-parent",
"riju.slice",
image,
"bash",
"-c",
"cat /var/run/riju/sentinel/fifo | ( sleep 10; while read -t2; do :; "
"done; pkill -g0 )",
sentinel,
NULL,
};
execvp(argv[0], argv);

View File

@ -80,13 +80,18 @@ resource "aws_autoscaling_group" "server" {
availability_zones = [local.primary_az]
desired_capacity = 1
min_size = 1
min_size = 0
max_size = 3
launch_template {
id = aws_launch_template.server.id
}
termination_policies = [
"OldestLaunchTemplate",
"OldestInstance",
]
tags = concat(
[
{
@ -98,6 +103,10 @@ resource "aws_autoscaling_group" "server" {
)
lifecycle {
ignore_changes = [target_group_arns]
ignore_changes = [
desired_capacity,
target_group_arns,
]
}
}

View File

@ -247,3 +247,52 @@ resource "aws_iam_role_policy_attachment" "backup_restores" {
role = aws_iam_role.backup.name
policy_arn = data.aws_iam_policy.backup_restores.arn
}
data "aws_iam_policy_document" "grafana_cloudwatch" {
statement {
actions = [
"cloudwatch:DescribeAlarmsForMetric",
"cloudwatch:DescribeAlarmHistory",
"cloudwatch:DescribeAlarms",
"cloudwatch:ListMetrics",
"cloudwatch:GetMetricStatistics",
"cloudwatch:GetMetricData",
"logs:DescribeLogGroups",
"logs:GetLogGroupFields",
"logs:StartQuery",
"logs:StopQuery",
"logs:GetQueryResults",
"logs:GetLogEvents",
"ec2:DescribeTags",
"ec2:DescribeInstances",
"ec2:DescribeRegions",
"tag:GetResources",
]
resources = [
"*",
]
}
}
resource "aws_iam_user" "grafana" {
name = "riju-grafana"
}
resource "aws_iam_policy" "grafana_cloudwatch" {
name = "riju-grafana-cloudwatch"
description = "Policy granting Grafana access to CloudWatch metrics and logs"
policy = data.aws_iam_policy_document.grafana_cloudwatch.json
}
resource "aws_iam_user_policy_attachment" "grafana_cloudwatch" {
user = aws_iam_user.grafana.name
policy_arn = aws_iam_policy.grafana_cloudwatch.arn
}
resource "aws_iam_access_key" "grafana" {
user = aws_iam_user.grafana.name
}

View File

@ -10,3 +10,12 @@ output "deploy_aws_secret_access_key" {
value = aws_iam_access_key.deploy.secret
sensitive = true
}
output "grafana_aws_access_key_id" {
value = aws_iam_access_key.grafana.id
}
output "grafana_aws_secret_access_key" {
value = aws_iam_access_key.grafana.secret
sensitive = true
}

View File

@ -545,7 +545,7 @@ async function executeDepGraph({
continue;
}
if (artifacts[target].publishTarget) {
if (statuses[dep] === "publishToRegistry") {
if (statuses[dep] === "publishToRegistry" && publish) {
plan.push({
artifact: dep,
action: "publishToRegistry",