diff --git a/.gitignore b/.gitignore index 382d670..d0df151 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,6 @@ .lsp-repl-history .terraform build -# Separate directory for things that are ignored by Git but not by -# Docker. -build-docker node_modules out +sentinel.h diff --git a/Makefile b/Makefile index 280c8ca..96abf0f 100644 --- a/Makefile +++ b/Makefile @@ -160,7 +160,7 @@ system: # Compile setuid binary for production ./system/compile.bash system-dev: # Compile and watch setuid binary for development - watchexec -w system/src -n -- ./system/compile.bash + watchexec -w system/res -w system/src -n -- ./system/compile.bash supervisor: # Compile supervisor binary for production ./supervisor/compile.bash diff --git a/system/compile.bash b/system/compile.bash index 24bee70..e2d926e 100755 --- a/system/compile.bash +++ b/system/compile.bash @@ -8,16 +8,21 @@ if [[ ! -d system/src ]]; then fi function verbosely { - echo "$@" + echo >&2 "$@" "$@" } mkdir -p system/out rm -f system/out/* + +pushd system/res >/dev/null +verbosely xxd -i sentinel.bash > ../src/sentinel.h +popd >/dev/null + for src in system/src/*.c; do out="${src/src/out}" out="${out/.c}" - verbosely clang -Wall -Wextra -Werror -std=c11 "${src}" -o "${out}" + verbosely clang -Isystem/res -Wall -Wextra -Werror -std=c11 "${src}" -o "${out}" if [[ "${out}" == *-privileged && -z "${UNPRIVILEGED:-}" ]]; then verbosely sudo chown root:riju "${out}" verbosely sudo chmod a=,g=rx,u=rwxs "${out}" diff --git a/system/res/docker-exec.py b/system/res/docker-exec.py deleted file mode 100755 index 3507730..0000000 --- a/system/res/docker-exec.py +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/env python3 - -import argparse -import signal -import subprocess -import sys -import uuid - - -class Parser(argparse.ArgumentParser): - def format_help(self): - return """ -Usage: docker-exec.bash [OPTIONS] CONTAINER COMMAND [ARG...] - -Run a command in a running container - -Options: - -i, --interactive Keep STDIN open even if not attached - -t, --tty Allocate a pseudo-TTY - -u, --user string Username or UID (format: :[]) -""" - - -parser = Parser() -parser.add_argument("-i", "--interactive", action="store_true") -parser.add_argument("-t", "--tty", action="store_true") -parser.add_argument("-u", "--user", type=str) -parser.add_argument("container", type=str) -parser.add_argument("arg", type=str, nargs="*") - -args = parser.parse_args() - -pidfiles = "/var/run/riju/pidfiles" -pidfile = pidfiles + "/" + str(uuid.uuid4()).replace("-", "") - -# We have to use 'kill -9' here, otherwise runuser intercepts the -# signal and takes its sweet time cleaning up. -def cleanup(*ignored_args): - subprocess.run( - [ - "docker", - "exec", - args.container, - "bash", - "-c", - f""" -set -euo pipefail -if [[ -f '{pidfile}' ]]; then - kill -9 -$(< '{pidfile}') 2>/dev/null || true - rm -f '{pidfile}' -fi - """, - ] - ) - - -signal.signal(signal.SIGINT, cleanup) -signal.signal(signal.SIGTERM, cleanup) - -exec_args = [] - -if args.interactive: - exec_args.append("-i") -if args.tty: - exec_args.append("-t") - -runuser_args = [] - -if args.user: - runuser_args = ["runuser", "-u", args.user, "--"] - -sys.exit( - subprocess.run( - [ - "docker", - "exec", - *exec_args, - args.container, - "bash", - "-c", - f""" -set -euo pipefail -umask 077 -mkdir -p '{pidfiles}' -echo "$$" > '{pidfile}' -exec "$@" - """, - "--", - *runuser_args, - *args.arg, - ] - ).returncode -) diff --git a/system/res/sentinel.bash b/system/res/sentinel.bash new file mode 100755 index 0000000..39623f6 --- /dev/null +++ b/system/res/sentinel.bash @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -euo pipefail + +while read -t2 -a cmd; do + if (( "${#cmd[@]}" > 0 )); then + case "${cmd[0]}" in + ping) ;; + exec|pty) + if (( "${#cmd[@]}" < 3 )); then + echo >&2 "usage: (exec|pty) UUID ARG..." + else + uuid="${cmd[1]}" + args=("${cmd[@]:2}") + echo >&2 "${cmd[0]} ${args[0]} with UUID ${uuid}" + input="/var/run/riju/share/cmd-${uuid}-input" + output="/var/run/riju/share/cmd-${uuid}-output" + mkfifo "${input}" "${output}" + runuser -u riju -- bash -c 'exec "$@"' sentinel "${args[@]}" < "${input}" &> "${output}" & + fi + ;; + *) + echo >&2 "unrecognized command: ${cmd[0]}" + ;; + esac + fi +done < /var/run/riju/share/control diff --git a/system/src/riju-system-privileged.c b/system/src/riju-system-privileged.c index d742370..c395645 100644 --- a/system/src/riju-system-privileged.c +++ b/system/src/riju-system-privileged.c @@ -7,18 +7,23 @@ #include #include #include +#include #include #include #include #include #include +#include "sentinel.h" + void __attribute__((noreturn)) die(char *msg) { fprintf(stderr, "%s\n", msg); exit(1); } +void init() { sentinel_bash[sentinel_bash_len - 1] = '\0'; } + void die_with_usage() { die("usage:\n" @@ -27,6 +32,70 @@ void die_with_usage() " riju-system-privileged pty UUID CMDLINE..."); } +char *quoteArgs(int argc, char **cmdline) +{ + char **printfArgs = malloc(sizeof(char *) * (argc + 3)); + printfArgs[0] = "printf"; + printfArgs[1] = "%q "; + memcpy(printfArgs + 2, cmdline, sizeof(char *) * argc); + printfArgs[argc + 2] = NULL; + int fd[2]; + if (pipe(fd) < 0) + die("pipe failed"); + pid_t pid = fork(); + if (pid < 0) + die("fork failed"); + else if (pid == 0) { + if (dup2(fd[1], STDOUT_FILENO) < 0) + die("dup2 failed"); + if (close(fd[0]) < 0 || close(fd[1]) < 0) + die("close failed"); + execvp(printfArgs[0], printfArgs); + die("execvp failed"); + } + if (close(fd[1]) < 0) + die("close failed"); + char *buf = malloc(1024); + if (buf == NULL) + die("malloc failed"); + ssize_t len_allocated = 2048; + ssize_t len_total = 0; + ssize_t len_read; + while ((len_read = read(fd[0], buf + len_total, 1024)) > 0) { + len_total += len_read; + if (len_allocated - len_total < 1024) { + char *new_buf = malloc(len_allocated + 1024); + if (new_buf == NULL) + die("malloc failed"); + memcpy(new_buf, buf, len_total); + free(buf); + buf = new_buf; + } + } + if (len_read < 0) + die("read failed"); + buf[len_total] = '\0'; + return buf; +} + +char *getUUID() +{ + char *buf = malloc(16); + if (buf == NULL) + die("malloc failed"); + if (getrandom(buf, 16, 0) != 16) + die("getrandom failed"); + char *uuid; + if (asprintf(&uuid, + "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%" + "02hhx%02hhx%02hhx%02hhx%02hhx%02hhx", + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], + buf[15]) < 0) + die("asprintf failed"); + return uuid; +} + char *parseUUID(char *uuid) { if (strnlen(uuid, 33) != 32) @@ -55,15 +124,17 @@ char *parseImageHash(char *imageHash) return imageHash; } +char *timeout_msg; + void wait_alarm(int signum) { (void)signum; - die("container did not come up within 10 seconds"); + die(timeout_msg); } void session(char *uuid, char *lang, char *imageHash) { - char *image, *container, *hostname, *volume, *fifo; + char *image, *container, *hostname, *share, *volume, *fifo; if ((imageHash != NULL ? asprintf(&image, "riju:lang-%s-%s", lang, imageHash) : asprintf(&image, "riju:lang-%s", lang)) < 0) die("asprintf failed"); @@ -71,20 +142,20 @@ void session(char *uuid, char *lang, char *imageHash) die("asprintf failed"); if (asprintf(&hostname, "HOSTNAME=%s", lang) < 0) die("asprintf failed"); - int rv = mkdir("/var/run/riju/sentinels", 0700); + if (asprintf(&share, "/var/run/riju/shares/%s", uuid) < 0) + die("asprintf failed"); + int rv = mkdir("/var/run/riju/shares", 0700); if (rv < 0 && errno != EEXIST) die("mkdir failed"); - char tmpdir[] = "/var/run/riju/sentinels/XXXXXX"; - if (mkdtemp(tmpdir) == NULL) - die("mkdtemp failed"); - if (asprintf(&volume, "%s:/var/run/riju/sentinel", tmpdir) < 0) + rv = mkdir(share, 0700); + if (rv < 0 && errno != EEXIST) + die("mkdir failed"); + if (asprintf(&volume, "%s:/var/run/riju/share", share) < 0) die("asprintf failed"); - if (asprintf(&fifo, "%s/fifo", tmpdir) < 0) + if (asprintf(&fifo, "%s/control", share) < 0) die("asprintf failed"); if (mknod(fifo, 0700 | S_IFIFO, 0) < 0) die("mknod failed"); - char sentinel[] = "cat /var/run/riju/sentinel/fifo | ( sleep 10; while " - "read -t2; do :; done; pkill -g0 )"; pid_t pid = fork(); if (pid < 0) die("fork failed"); @@ -138,15 +209,16 @@ void session(char *uuid, char *lang, char *imageHash) image, "bash", "-c", - sentinel, + (char *)sentinel_bash, NULL, }; execvp(argv[0], argv); die("execvp failed"); } - struct timespec ts_10ms; // 10ms + struct timespec ts_10ms; ts_10ms.tv_sec = 0; ts_10ms.tv_nsec = 1000 * 1000 * 10; + timeout_msg = "container did not come up within 10 seconds"; signal(SIGALRM, wait_alarm); alarm(10); int fd; @@ -161,20 +233,16 @@ void session(char *uuid, char *lang, char *imageHash) die("nanosleep failed"); } signal(SIGALRM, SIG_IGN); - if (unlink(fifo) < 0) - die("unlink failed"); - if (rmdir(tmpdir) < 0) - die("rmdir failed"); pid = fork(); if (pid < 0) die("fork failed"); else if (pid == 0) { - struct timespec ts_1s; // 10ms + struct timespec ts_1s; ts_1s.tv_sec = 1; ts_1s.tv_nsec = 0; while (1) { - static const char ok[] = "ok\n"; - if (write(fd, ok, sizeof(ok) / sizeof(char)) < 0) + static const char ok[] = "ping\n"; + if (write(fd, ok, sizeof(ok) / sizeof(char)) != sizeof(ok) / sizeof(char)) die("write failed"); int rv = nanosleep(&ts_1s, NULL); if (rv != 0 && errno != EINTR) @@ -190,29 +258,89 @@ void session(char *uuid, char *lang, char *imageHash) void exec(char *uuid, int argc, char **cmdline, bool pty) { - char *container; - if (asprintf(&container, "riju-session-%s", uuid) < 0) + char *share, *ctlFIFO, *inputFIFO, *outputFIFO, *ctlCmd, *dataFIFO; + if (asprintf(&share, "/var/run/riju/shares/%s", uuid) < 0) die("asprintf failed"); - char *argvPrefix[] = { - "./system/res/docker-exec.py", - "--user", - "riju", - pty ? "-it" : "-i", - container, - "--", - }; - char **argv = malloc(sizeof(argvPrefix) + (argc + 1) * sizeof(char *)); - if (argv == NULL) - die("malloc failed"); - memcpy(argv, argvPrefix, sizeof(argvPrefix)); - memcpy((void *)argv + sizeof(argvPrefix), cmdline, argc * sizeof(char *)); - argv[sizeof(argvPrefix) + argc * sizeof(char *)] = NULL; - execvp(argv[0], argv); - die("execvp failed"); + if (asprintf(&ctlFIFO, "%s/control", share) < 0) + die("asprintf failed"); + char *procUUID = getUUID(); + if (asprintf(&inputFIFO, "%s/cmd-%s-input", share, procUUID) < 0) + die("asprintf failed"); + if (asprintf(&outputFIFO, "%s/cmd-%s-output", share, procUUID) < 0) + die("asprintf failed"); + int fd = open(ctlFIFO, O_WRONLY); + if (fd < 0) + die("open failed"); + char *quotedArgs = quoteArgs(argc, cmdline); + int len = asprintf(&ctlCmd, "%s %s %s\n", pty ? "pty" : "exec", procUUID, + quotedArgs); + if (len < 0) + die("asprintf failed"); + int len_written; + while ((len_written = write(fd, ctlCmd, len)) > 0) { + ctlCmd += len_written; + len -= len_written; + } + if (len_written < 0) + die("write failed"); + close(fd); + struct timespec ts_10ms; + ts_10ms.tv_sec = 0; + ts_10ms.tv_nsec = 1000 * 1000 * 10; + int mode; + pid_t pid = fork(); + if (pid < 0) + die("fork failed"); + else if (pid == 0) { + dataFIFO = inputFIFO; + timeout_msg = "sentinel did not set up input FIFO within 1 second"; + mode = O_WRONLY; + } else { + dataFIFO = outputFIFO; + timeout_msg = "sentinel did not set up output FIFO within 1 second"; + mode = O_RDONLY; + } + signal(SIGALRM, wait_alarm); + alarm(1); + while (1) { + fd = open(dataFIFO, mode); + if (fd >= 0) + break; + if (errno != ENOENT) + die("open failed"); + int rv = nanosleep(&ts_10ms, NULL); + if (rv != 0 && errno != EINTR) + die("nanosleep failed"); + } + signal(SIGALRM, SIG_IGN); + char buf[1024]; + if (pid == 0) { + while ((len = read(STDIN_FILENO, buf, 1024)) > 0) { + char *ptr = buf; + while (len > 0) { + len_written = write(fd, ptr, len); + if (len_written < 0) + die("write failed"); + len -= len_written; + ptr += len_written; + } + } + } else { + while ((len = read(fd, buf, 1024)) > 0) { + fwrite(buf, 1, len, stdout); + if (ferror(stdout)) + die("fwrite failed"); + if (feof(stdout)) + break; + } + } + if (len < 0) + die("read failed"); } int main(int argc, char **argv) { + init(); if (seteuid(0) != 0) die("seteuid failed"); if (argc < 2)