Proof of concept for pid1 sentinel executor
This commit is contained in:
parent
43d2a20971
commit
389d8d0ad6
|
@ -4,8 +4,6 @@
|
|||
.lsp-repl-history
|
||||
.terraform
|
||||
build
|
||||
# Separate directory for things that are ignored by Git but not by
|
||||
# Docker.
|
||||
build-docker
|
||||
node_modules
|
||||
out
|
||||
sentinel.h
|
||||
|
|
2
Makefile
2
Makefile
|
@ -160,7 +160,7 @@ system: # Compile setuid binary for production
|
|||
./system/compile.bash
|
||||
|
||||
system-dev: # Compile and watch setuid binary for development
|
||||
watchexec -w system/src -n -- ./system/compile.bash
|
||||
watchexec -w system/res -w system/src -n -- ./system/compile.bash
|
||||
|
||||
supervisor: # Compile supervisor binary for production
|
||||
./supervisor/compile.bash
|
||||
|
|
|
@ -8,16 +8,21 @@ if [[ ! -d system/src ]]; then
|
|||
fi
|
||||
|
||||
function verbosely {
|
||||
echo "$@"
|
||||
echo >&2 "$@"
|
||||
"$@"
|
||||
}
|
||||
|
||||
mkdir -p system/out
|
||||
rm -f system/out/*
|
||||
|
||||
pushd system/res >/dev/null
|
||||
verbosely xxd -i sentinel.bash > ../src/sentinel.h
|
||||
popd >/dev/null
|
||||
|
||||
for src in system/src/*.c; do
|
||||
out="${src/src/out}"
|
||||
out="${out/.c}"
|
||||
verbosely clang -Wall -Wextra -Werror -std=c11 "${src}" -o "${out}"
|
||||
verbosely clang -Isystem/res -Wall -Wextra -Werror -std=c11 "${src}" -o "${out}"
|
||||
if [[ "${out}" == *-privileged && -z "${UNPRIVILEGED:-}" ]]; then
|
||||
verbosely sudo chown root:riju "${out}"
|
||||
verbosely sudo chmod a=,g=rx,u=rwxs "${out}"
|
||||
|
|
|
@ -1,93 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
|
||||
class Parser(argparse.ArgumentParser):
|
||||
def format_help(self):
|
||||
return """
|
||||
Usage: docker-exec.bash [OPTIONS] CONTAINER COMMAND [ARG...]
|
||||
|
||||
Run a command in a running container
|
||||
|
||||
Options:
|
||||
-i, --interactive Keep STDIN open even if not attached
|
||||
-t, --tty Allocate a pseudo-TTY
|
||||
-u, --user string Username or UID (format: <name|uid>:[<group|gid>])
|
||||
"""
|
||||
|
||||
|
||||
parser = Parser()
|
||||
parser.add_argument("-i", "--interactive", action="store_true")
|
||||
parser.add_argument("-t", "--tty", action="store_true")
|
||||
parser.add_argument("-u", "--user", type=str)
|
||||
parser.add_argument("container", type=str)
|
||||
parser.add_argument("arg", type=str, nargs="*")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
pidfiles = "/var/run/riju/pidfiles"
|
||||
pidfile = pidfiles + "/" + str(uuid.uuid4()).replace("-", "")
|
||||
|
||||
# We have to use 'kill -9' here, otherwise runuser intercepts the
|
||||
# signal and takes its sweet time cleaning up.
|
||||
def cleanup(*ignored_args):
|
||||
subprocess.run(
|
||||
[
|
||||
"docker",
|
||||
"exec",
|
||||
args.container,
|
||||
"bash",
|
||||
"-c",
|
||||
f"""
|
||||
set -euo pipefail
|
||||
if [[ -f '{pidfile}' ]]; then
|
||||
kill -9 -$(< '{pidfile}') 2>/dev/null || true
|
||||
rm -f '{pidfile}'
|
||||
fi
|
||||
""",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
signal.signal(signal.SIGINT, cleanup)
|
||||
signal.signal(signal.SIGTERM, cleanup)
|
||||
|
||||
exec_args = []
|
||||
|
||||
if args.interactive:
|
||||
exec_args.append("-i")
|
||||
if args.tty:
|
||||
exec_args.append("-t")
|
||||
|
||||
runuser_args = []
|
||||
|
||||
if args.user:
|
||||
runuser_args = ["runuser", "-u", args.user, "--"]
|
||||
|
||||
sys.exit(
|
||||
subprocess.run(
|
||||
[
|
||||
"docker",
|
||||
"exec",
|
||||
*exec_args,
|
||||
args.container,
|
||||
"bash",
|
||||
"-c",
|
||||
f"""
|
||||
set -euo pipefail
|
||||
umask 077
|
||||
mkdir -p '{pidfiles}'
|
||||
echo "$$" > '{pidfile}'
|
||||
exec "$@"
|
||||
""",
|
||||
"--",
|
||||
*runuser_args,
|
||||
*args.arg,
|
||||
]
|
||||
).returncode
|
||||
)
|
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
while read -t2 -a cmd; do
|
||||
if (( "${#cmd[@]}" > 0 )); then
|
||||
case "${cmd[0]}" in
|
||||
ping) ;;
|
||||
exec|pty)
|
||||
if (( "${#cmd[@]}" < 3 )); then
|
||||
echo >&2 "usage: (exec|pty) UUID ARG..."
|
||||
else
|
||||
uuid="${cmd[1]}"
|
||||
args=("${cmd[@]:2}")
|
||||
echo >&2 "${cmd[0]} ${args[0]} with UUID ${uuid}"
|
||||
input="/var/run/riju/share/cmd-${uuid}-input"
|
||||
output="/var/run/riju/share/cmd-${uuid}-output"
|
||||
mkfifo "${input}" "${output}"
|
||||
runuser -u riju -- bash -c 'exec "$@"' sentinel "${args[@]}" < "${input}" &> "${output}" &
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo >&2 "unrecognized command: ${cmd[0]}"
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
done < /var/run/riju/share/control
|
|
@ -7,18 +7,23 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/random.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "sentinel.h"
|
||||
|
||||
void __attribute__((noreturn)) die(char *msg)
|
||||
{
|
||||
fprintf(stderr, "%s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void init() { sentinel_bash[sentinel_bash_len - 1] = '\0'; }
|
||||
|
||||
void die_with_usage()
|
||||
{
|
||||
die("usage:\n"
|
||||
|
@ -27,6 +32,70 @@ void die_with_usage()
|
|||
" riju-system-privileged pty UUID CMDLINE...");
|
||||
}
|
||||
|
||||
char *quoteArgs(int argc, char **cmdline)
|
||||
{
|
||||
char **printfArgs = malloc(sizeof(char *) * (argc + 3));
|
||||
printfArgs[0] = "printf";
|
||||
printfArgs[1] = "%q ";
|
||||
memcpy(printfArgs + 2, cmdline, sizeof(char *) * argc);
|
||||
printfArgs[argc + 2] = NULL;
|
||||
int fd[2];
|
||||
if (pipe(fd) < 0)
|
||||
die("pipe failed");
|
||||
pid_t pid = fork();
|
||||
if (pid < 0)
|
||||
die("fork failed");
|
||||
else if (pid == 0) {
|
||||
if (dup2(fd[1], STDOUT_FILENO) < 0)
|
||||
die("dup2 failed");
|
||||
if (close(fd[0]) < 0 || close(fd[1]) < 0)
|
||||
die("close failed");
|
||||
execvp(printfArgs[0], printfArgs);
|
||||
die("execvp failed");
|
||||
}
|
||||
if (close(fd[1]) < 0)
|
||||
die("close failed");
|
||||
char *buf = malloc(1024);
|
||||
if (buf == NULL)
|
||||
die("malloc failed");
|
||||
ssize_t len_allocated = 2048;
|
||||
ssize_t len_total = 0;
|
||||
ssize_t len_read;
|
||||
while ((len_read = read(fd[0], buf + len_total, 1024)) > 0) {
|
||||
len_total += len_read;
|
||||
if (len_allocated - len_total < 1024) {
|
||||
char *new_buf = malloc(len_allocated + 1024);
|
||||
if (new_buf == NULL)
|
||||
die("malloc failed");
|
||||
memcpy(new_buf, buf, len_total);
|
||||
free(buf);
|
||||
buf = new_buf;
|
||||
}
|
||||
}
|
||||
if (len_read < 0)
|
||||
die("read failed");
|
||||
buf[len_total] = '\0';
|
||||
return buf;
|
||||
}
|
||||
|
||||
char *getUUID()
|
||||
{
|
||||
char *buf = malloc(16);
|
||||
if (buf == NULL)
|
||||
die("malloc failed");
|
||||
if (getrandom(buf, 16, 0) != 16)
|
||||
die("getrandom failed");
|
||||
char *uuid;
|
||||
if (asprintf(&uuid,
|
||||
"%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%"
|
||||
"02hhx%02hhx%02hhx%02hhx%02hhx%02hhx",
|
||||
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
|
||||
buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14],
|
||||
buf[15]) < 0)
|
||||
die("asprintf failed");
|
||||
return uuid;
|
||||
}
|
||||
|
||||
char *parseUUID(char *uuid)
|
||||
{
|
||||
if (strnlen(uuid, 33) != 32)
|
||||
|
@ -55,15 +124,17 @@ char *parseImageHash(char *imageHash)
|
|||
return imageHash;
|
||||
}
|
||||
|
||||
char *timeout_msg;
|
||||
|
||||
void wait_alarm(int signum)
|
||||
{
|
||||
(void)signum;
|
||||
die("container did not come up within 10 seconds");
|
||||
die(timeout_msg);
|
||||
}
|
||||
|
||||
void session(char *uuid, char *lang, char *imageHash)
|
||||
{
|
||||
char *image, *container, *hostname, *volume, *fifo;
|
||||
char *image, *container, *hostname, *share, *volume, *fifo;
|
||||
if ((imageHash != NULL ? asprintf(&image, "riju:lang-%s-%s", lang, imageHash)
|
||||
: asprintf(&image, "riju:lang-%s", lang)) < 0)
|
||||
die("asprintf failed");
|
||||
|
@ -71,20 +142,20 @@ void session(char *uuid, char *lang, char *imageHash)
|
|||
die("asprintf failed");
|
||||
if (asprintf(&hostname, "HOSTNAME=%s", lang) < 0)
|
||||
die("asprintf failed");
|
||||
int rv = mkdir("/var/run/riju/sentinels", 0700);
|
||||
if (asprintf(&share, "/var/run/riju/shares/%s", uuid) < 0)
|
||||
die("asprintf failed");
|
||||
int rv = mkdir("/var/run/riju/shares", 0700);
|
||||
if (rv < 0 && errno != EEXIST)
|
||||
die("mkdir failed");
|
||||
char tmpdir[] = "/var/run/riju/sentinels/XXXXXX";
|
||||
if (mkdtemp(tmpdir) == NULL)
|
||||
die("mkdtemp failed");
|
||||
if (asprintf(&volume, "%s:/var/run/riju/sentinel", tmpdir) < 0)
|
||||
rv = mkdir(share, 0700);
|
||||
if (rv < 0 && errno != EEXIST)
|
||||
die("mkdir failed");
|
||||
if (asprintf(&volume, "%s:/var/run/riju/share", share) < 0)
|
||||
die("asprintf failed");
|
||||
if (asprintf(&fifo, "%s/fifo", tmpdir) < 0)
|
||||
if (asprintf(&fifo, "%s/control", share) < 0)
|
||||
die("asprintf failed");
|
||||
if (mknod(fifo, 0700 | S_IFIFO, 0) < 0)
|
||||
die("mknod failed");
|
||||
char sentinel[] = "cat /var/run/riju/sentinel/fifo | ( sleep 10; while "
|
||||
"read -t2; do :; done; pkill -g0 )";
|
||||
pid_t pid = fork();
|
||||
if (pid < 0)
|
||||
die("fork failed");
|
||||
|
@ -138,15 +209,16 @@ void session(char *uuid, char *lang, char *imageHash)
|
|||
image,
|
||||
"bash",
|
||||
"-c",
|
||||
sentinel,
|
||||
(char *)sentinel_bash,
|
||||
NULL,
|
||||
};
|
||||
execvp(argv[0], argv);
|
||||
die("execvp failed");
|
||||
}
|
||||
struct timespec ts_10ms; // 10ms
|
||||
struct timespec ts_10ms;
|
||||
ts_10ms.tv_sec = 0;
|
||||
ts_10ms.tv_nsec = 1000 * 1000 * 10;
|
||||
timeout_msg = "container did not come up within 10 seconds";
|
||||
signal(SIGALRM, wait_alarm);
|
||||
alarm(10);
|
||||
int fd;
|
||||
|
@ -161,20 +233,16 @@ void session(char *uuid, char *lang, char *imageHash)
|
|||
die("nanosleep failed");
|
||||
}
|
||||
signal(SIGALRM, SIG_IGN);
|
||||
if (unlink(fifo) < 0)
|
||||
die("unlink failed");
|
||||
if (rmdir(tmpdir) < 0)
|
||||
die("rmdir failed");
|
||||
pid = fork();
|
||||
if (pid < 0)
|
||||
die("fork failed");
|
||||
else if (pid == 0) {
|
||||
struct timespec ts_1s; // 10ms
|
||||
struct timespec ts_1s;
|
||||
ts_1s.tv_sec = 1;
|
||||
ts_1s.tv_nsec = 0;
|
||||
while (1) {
|
||||
static const char ok[] = "ok\n";
|
||||
if (write(fd, ok, sizeof(ok) / sizeof(char)) < 0)
|
||||
static const char ok[] = "ping\n";
|
||||
if (write(fd, ok, sizeof(ok) / sizeof(char)) != sizeof(ok) / sizeof(char))
|
||||
die("write failed");
|
||||
int rv = nanosleep(&ts_1s, NULL);
|
||||
if (rv != 0 && errno != EINTR)
|
||||
|
@ -190,29 +258,89 @@ void session(char *uuid, char *lang, char *imageHash)
|
|||
|
||||
void exec(char *uuid, int argc, char **cmdline, bool pty)
|
||||
{
|
||||
char *container;
|
||||
if (asprintf(&container, "riju-session-%s", uuid) < 0)
|
||||
char *share, *ctlFIFO, *inputFIFO, *outputFIFO, *ctlCmd, *dataFIFO;
|
||||
if (asprintf(&share, "/var/run/riju/shares/%s", uuid) < 0)
|
||||
die("asprintf failed");
|
||||
char *argvPrefix[] = {
|
||||
"./system/res/docker-exec.py",
|
||||
"--user",
|
||||
"riju",
|
||||
pty ? "-it" : "-i",
|
||||
container,
|
||||
"--",
|
||||
};
|
||||
char **argv = malloc(sizeof(argvPrefix) + (argc + 1) * sizeof(char *));
|
||||
if (argv == NULL)
|
||||
die("malloc failed");
|
||||
memcpy(argv, argvPrefix, sizeof(argvPrefix));
|
||||
memcpy((void *)argv + sizeof(argvPrefix), cmdline, argc * sizeof(char *));
|
||||
argv[sizeof(argvPrefix) + argc * sizeof(char *)] = NULL;
|
||||
execvp(argv[0], argv);
|
||||
die("execvp failed");
|
||||
if (asprintf(&ctlFIFO, "%s/control", share) < 0)
|
||||
die("asprintf failed");
|
||||
char *procUUID = getUUID();
|
||||
if (asprintf(&inputFIFO, "%s/cmd-%s-input", share, procUUID) < 0)
|
||||
die("asprintf failed");
|
||||
if (asprintf(&outputFIFO, "%s/cmd-%s-output", share, procUUID) < 0)
|
||||
die("asprintf failed");
|
||||
int fd = open(ctlFIFO, O_WRONLY);
|
||||
if (fd < 0)
|
||||
die("open failed");
|
||||
char *quotedArgs = quoteArgs(argc, cmdline);
|
||||
int len = asprintf(&ctlCmd, "%s %s %s\n", pty ? "pty" : "exec", procUUID,
|
||||
quotedArgs);
|
||||
if (len < 0)
|
||||
die("asprintf failed");
|
||||
int len_written;
|
||||
while ((len_written = write(fd, ctlCmd, len)) > 0) {
|
||||
ctlCmd += len_written;
|
||||
len -= len_written;
|
||||
}
|
||||
if (len_written < 0)
|
||||
die("write failed");
|
||||
close(fd);
|
||||
struct timespec ts_10ms;
|
||||
ts_10ms.tv_sec = 0;
|
||||
ts_10ms.tv_nsec = 1000 * 1000 * 10;
|
||||
int mode;
|
||||
pid_t pid = fork();
|
||||
if (pid < 0)
|
||||
die("fork failed");
|
||||
else if (pid == 0) {
|
||||
dataFIFO = inputFIFO;
|
||||
timeout_msg = "sentinel did not set up input FIFO within 1 second";
|
||||
mode = O_WRONLY;
|
||||
} else {
|
||||
dataFIFO = outputFIFO;
|
||||
timeout_msg = "sentinel did not set up output FIFO within 1 second";
|
||||
mode = O_RDONLY;
|
||||
}
|
||||
signal(SIGALRM, wait_alarm);
|
||||
alarm(1);
|
||||
while (1) {
|
||||
fd = open(dataFIFO, mode);
|
||||
if (fd >= 0)
|
||||
break;
|
||||
if (errno != ENOENT)
|
||||
die("open failed");
|
||||
int rv = nanosleep(&ts_10ms, NULL);
|
||||
if (rv != 0 && errno != EINTR)
|
||||
die("nanosleep failed");
|
||||
}
|
||||
signal(SIGALRM, SIG_IGN);
|
||||
char buf[1024];
|
||||
if (pid == 0) {
|
||||
while ((len = read(STDIN_FILENO, buf, 1024)) > 0) {
|
||||
char *ptr = buf;
|
||||
while (len > 0) {
|
||||
len_written = write(fd, ptr, len);
|
||||
if (len_written < 0)
|
||||
die("write failed");
|
||||
len -= len_written;
|
||||
ptr += len_written;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while ((len = read(fd, buf, 1024)) > 0) {
|
||||
fwrite(buf, 1, len, stdout);
|
||||
if (ferror(stdout))
|
||||
die("fwrite failed");
|
||||
if (feof(stdout))
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (len < 0)
|
||||
die("read failed");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
init();
|
||||
if (seteuid(0) != 0)
|
||||
die("seteuid failed");
|
||||
if (argc < 2)
|
||||
|
|
Loading…
Reference in New Issue