Debugging

This commit is contained in:
Radon Rosborough 2021-07-11 06:48:01 +00:00
parent 25e3bdf6d8
commit ada1b64fc1
10 changed files with 180 additions and 11 deletions

View File

@ -64,14 +64,15 @@ export class Session {
this.container = {
pty: containerPty,
};
containerPty.on("close", (code, signal) =>
containerPty.on("close", async (code, signal) => {
this.send({
event: "serviceFailed",
service: "container",
error: `Exited with status ${signal || code}`,
code: signal || code,
})
);
});
await this.teardown();
});
containerPty.on("error", (err) =>
this.send({
event: "serviceFailed",

View File

@ -1,5 +1,3 @@
# Disabled due to https://github.com/facebook/hhvm/issues/8796
id: "hack"
aliases:
- "hhvm"

35
packer/cloudwatch.json Normal file
View File

@ -0,0 +1,35 @@
{
"agent": {
"metrics_collection_interval": 60,
"run_as_user": "root"
},
"metrics": {
"append_dimensions": {
"AutoScalingGroupName": "${aws:AutoScalingGroupName}",
"ImageId": "${aws:ImageId}",
"InstanceId": "${aws:InstanceId}",
"InstanceType": "${aws:InstanceType}"
},
"aggregation_dimensions": [
["AutoScalingGroupName"],
["AutoScalingGroupName", "path"]
],
"metrics_collected": {
"disk": {
"measurement": [
"used_percent"
],
"metrics_collection_interval": 60,
"resources": [
"*"
]
},
"mem": {
"measurement": [
"mem_used_percent"
],
"metrics_collection_interval": 60
}
}
}
}

View File

@ -42,6 +42,11 @@ source "amazon-ebs" "ubuntu" {
build {
sources = ["source.amazon-ebs.ubuntu"]
provisioner "file" {
destination = "/tmp/cloudwatch.json"
source = "cloudwatch.json"
}
provisioner "file" {
destination = "/tmp/riju-init-volume"
source = "riju-init-volume"

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash
set -euo pipefail
set -euxo pipefail
: ${ADMIN_PASSWORD}
: ${AWS_REGION}
@ -37,9 +37,15 @@ wget -nv https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip -O awscli.zip
unzip -q awscli.zip
sudo ./aws/install
sudo chown root:root /tmp/riju-init-volume /tmp/riju-supervisor /tmp/riju.service
wget -nv https://s3.us-west-1.amazonaws.com/amazon-ssm-us-west-1/latest/debian_amd64/amazon-ssm-agent.deb
wget -nv https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/amd64/latest/amazon-cloudwatch-agent.deb
sudo apt-get install -y ./amazon-cloudwatch-agent.deb
sudo chown root:root /tmp/cloudwatch.json /tmp/riju-init-volume /tmp/riju-supervisor /tmp/riju.service
sudo mv /tmp/riju-init-volume /tmp/riju-supervisor /usr/local/bin/
sudo mv /tmp/riju.service /etc/systemd/system/
sudo mv /tmp/cloudwatch.json /opt/aws/amazon-cloudwatch-agent/bin/config.json
sudo sed -Ei 's/^#?PermitRootLogin .*/PermitRootLogin no/' /etc/ssh/sshd_config
sudo sed -Ei 's/^#?PasswordAuthentication .*/PasswordAuthentication no/' /etc/ssh/sshd_config
@ -51,8 +57,7 @@ sudo sed -Ei "s/\\\$SUPERVISOR_ACCESS_TOKEN/${SUPERVISOR_ACCESS_TOKEN}/" /etc/sy
sudo passwd -l root
sudo useradd admin -g admin -G sudo -s /usr/bin/bash -p "$(echo "${ADMIN_PASSWORD}" | mkpasswd -s)" -m
sudo hostnamectl set-hostname riju
sudo amazon-cloudwatch-agent-ctl -a fetch-config -m ec2 -s -c file:/opt/aws/amazon-cloudwatch-agent/bin/config.json
sudo systemctl enable riju
sudo userdel ubuntu -f

View File

@ -208,6 +208,7 @@ func (sv *supervisor) reloadWithScheduling() {
}
var rijuImageRegexp = regexp.MustCompile(`(?:^|/)riju:([^<>]+)$`)
var rijuImageTagRegexp = regexp.MustCompile(`^([^|]+)\|([^|]+)$`)
func (sv *supervisor) reload() error {
sv.status("getting access token from ECR")
@ -377,13 +378,54 @@ func (sv *supervisor) reload() error {
sv.isGreen = !sv.isGreen
sv.status("stopping old container")
dockerRm := exec.Command("docker", "rm", "-f", oldName)
dockerRm.Stdout = dockerRm.Stdout
dockerRm.Stderr = dockerRm.Stderr
dockerRm.Stdout = os.Stdout
dockerRm.Stderr = os.Stderr
if err := dockerRm.Run(); err != nil {
return err
}
sv.status("saving updated config hash")
sv.deployConfigHash = deployCfgHash
sv.status("pruning unneeded Docker images")
dockerImageLs = exec.Command(
"docker", "image", "ls", "--format",
"{{ .ID }}|{{ .Tag }}",
)
dockerImageLs.Stderr = os.Stderr
out, err = dockerImageLs.Output()
if err != nil {
return err
}
neededTagsSet := map[string]bool{}
for _, tag := range neededTags {
neededTagsSet[tag] = true
}
unneededTagsSet := map[string]bool{}
for _, line := range strings.Split(string(out), "\n") {
if match := rijuImageTagRegexp.FindStringSubmatch(line); match != nil {
id := match[1]
tag := match[2]
if !neededTagsSet[tag] {
unneededTagsSet[id] = true
}
}
}
unneededTags := []string{}
for tag := range unneededTagsSet {
unneededTags = append(unneededTags, tag)
}
dockerImageRmArgs := append([]string{"image", "rm", "-f"}, unneededTags...)
dockerImageRm := exec.Command("docker", dockerImageRmArgs...)
dockerImageRm.Stdout = os.Stdout
dockerImageRm.Stderr = os.Stderr
if err := dockerImageRm.Run(); err != nil {
return err
}
dockerPrune := exec.Command("docker", "system", "prune")
dockerPrune.Stdout = os.Stdout
dockerPrune.Stderr = os.Stderr
if err := dockerPrune.Run(); err != nil {
return err
}
sv.status("reload complete")
return nil
}

61
tf/cloudwatch.tf Normal file
View File

@ -0,0 +1,61 @@
resource "aws_cloudwatch_metric_alarm" "server_memory" {
count = local.ami_available ? 1 : 0
alarm_name = "riju-server-memory-high"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "mem_used_percent"
namespace = "CWAgent"
period = "60"
statistic = "Maximum"
threshold = "80"
alarm_description = "Memory usage on Riju server is above 80%"
ok_actions = [aws_sns_topic.riju.arn]
alarm_actions = [aws_sns_topic.riju.arn]
insufficient_data_actions = [aws_sns_topic.riju.arn]
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.server[count.index].name
}
}
resource "aws_cloudwatch_metric_alarm" "server_data_volume_disk_space" {
count = local.ami_available ? 1 : 0
alarm_name = "riju-server-data-volume-disk-usage-high"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "disk_used_percent"
namespace = "CWAgent"
period = "60"
statistic = "Minimum"
threshold = "80"
alarm_description = "Disk space usage for data volume on Riju server is above 80%"
ok_actions = [aws_sns_topic.riju.arn]
alarm_actions = [aws_sns_topic.riju.arn]
insufficient_data_actions = [aws_sns_topic.riju.arn]
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.server[count.index].name
path = "/mnt/riju/data"
}
}
resource "aws_cloudwatch_metric_alarm" "server_root_volume_disk_space" {
count = local.ami_available ? 1 : 0
alarm_name = "riju-server-root-volume-disk-usage-high"
comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = "5"
metric_name = "disk_used_percent"
namespace = "CWAgent"
period = "60"
statistic = "Minimum"
threshold = "80"
alarm_description = "Disk space usage for root volume on Riju server is above 80%"
ok_actions = [aws_sns_topic.riju.arn]
alarm_actions = [aws_sns_topic.riju.arn]
insufficient_data_actions = [aws_sns_topic.riju.arn]
dimensions = {
AutoScalingGroupName = aws_autoscaling_group.server[count.index].name
path = "/"
}
}

View File

@ -1,3 +1,11 @@
data "aws_iam_policy" "cloudwatch" {
name = "CloudWatchAgentServerPolicy"
}
data "aws_iam_policy" "ssm" {
name = "AmazonSSMManagedInstanceCore"
}
resource "aws_iam_user" "deploy" {
name = "riju-deploy"
}
@ -171,6 +179,16 @@ resource "aws_iam_role_policy_attachment" "server" {
policy_arn = aws_iam_policy.server.arn
}
resource "aws_iam_role_policy_attachment" "server_cloudwatch" {
role = aws_iam_role.server.name
policy_arn = data.aws_iam_policy.cloudwatch.arn
}
resource "aws_iam_role_policy_attachment" "server_ssm" {
role = aws_iam_role.server.name
policy_arn = data.aws_iam_policy.ssm.arn
}
resource "aws_iam_instance_profile" "server" {
name = "riju-server"
role = aws_iam_role.server.name

3
tf/sns.tf Normal file
View File

@ -0,0 +1,3 @@
resource "aws_sns_topic" "riju" {
name = "Riju"
}

View File

@ -32,6 +32,7 @@ async function main() {
program.parse(process.argv);
await fs.mkdir("build", { recursive: true });
await fs.writeFile("build/config.json", JSON.stringify(await getDeployConfig(), null, 2) + "\n");
console.log("wrote build/config.json");
process.exit(0);
}