X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=isolate%2Fisolate.c;h=ea14aa9998ba4f6e79dd6a838104ad92b558af7b;hb=28c3aa2bd73958557fc5e5441bcb7a2a2ca1c375;hp=72c2149e5afb03202044e84480a059d00e5ee014;hpb=d2a9e0bc5964ef37cc2035cae8bcab26ff279d5c;p=moe.git diff --git a/isolate/isolate.c b/isolate/isolate.c index 72c2149..ea14aa9 100644 --- a/isolate/isolate.c +++ b/isolate/isolate.c @@ -1,14 +1,14 @@ /* - * A Process Isolator based in Linux Containers + * A Process Isolator based on Linux Containers * * (c) 2012 Martin Mares + * (c) 2012 Bernard Blackham */ #define _GNU_SOURCE #include "autoconf.h" -// FIXME: prune #include #include #include @@ -22,11 +22,8 @@ #include #include #include -#include #include -#include #include -#include #include #include #include @@ -35,10 +32,9 @@ #define UNUSED __attribute__((unused)) #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) -// FIXME: Make configurable, probably in compile time -#define BOX_DIR "/tmp/box" -#define BOX_UID 60000 -#define BOX_GID 60000 +#define BOX_DIR CONFIG_ISOLATE_BOX_DIR +#define BOX_UID CONFIG_ISOLATE_BOX_UID +#define BOX_GID CONFIG_ISOLATE_BOX_GID static int timeout; /* milliseconds */ static int wall_timeout; @@ -59,18 +55,22 @@ static uid_t orig_uid; static gid_t orig_gid; static pid_t box_pid; -static volatile sig_atomic_t timer_tick; -static struct timeval start_time; -static int ticks_per_sec; static int partial_line; static char cleanup_cmd[256]; +static struct timeval start_time; +static int ticks_per_sec; static int total_ms, wall_ms; +static volatile sig_atomic_t timer_tick; + +static int error_pipes[2]; +static int write_errors_to_fd; +static int read_errors_from_fd; static void die(char *msg, ...) NONRET; static void cg_stats(void); static int get_wall_time_ms(void); -static int get_run_time_ms(void); +static int get_run_time_ms(struct rusage *rus); /*** Meta-files ***/ @@ -111,7 +111,7 @@ meta_printf(const char *fmt, ...) static void final_stats(struct rusage *rus) { - total_ms = get_run_time_ms(); + total_ms = get_run_time_ms(rus); wall_ms = get_wall_time_ms(); meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000); @@ -176,9 +176,19 @@ die(char *msg, ...) { va_list args; va_start(args, msg); - flush_line(); char buf[1024]; - vsnprintf(buf, sizeof(buf), msg, args); + int n = vsnprintf(buf, sizeof(buf), msg, args); + + if (write_errors_to_fd) + { + // We are inside the box, have to use error pipe for error reporting. + // We hope that the whole error message fits in PIPE_BUF bytes. + write(write_errors_to_fd, buf, n); + exit(2); + } + + // Otherwise, we in the box keeper process, so we report errors normally + flush_line(); meta_printf("status:XX\nmessage:%s\n", buf); fputs(buf, stderr); fputc('\n', stderr); @@ -222,6 +232,8 @@ msg(char *msg, ...) va_end(args); } +/*** Utility functions ***/ + static void * xmalloc(size_t size) { @@ -231,6 +243,21 @@ xmalloc(size_t size) return p; } +static char * +xstrdup(char *str) +{ + char *p = strdup(str); + if (!p) + die("Out of memory"); + return p; +} + +static int dir_exists(char *path) +{ + struct stat st; + return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); +} + /*** Environment rules ***/ struct env_rule { @@ -366,6 +393,180 @@ setup_environment(void) return env; } +/*** Mount rules ***/ + +struct dir_rule { + char *inside; // A relative path + char *outside; // This can be an absolute path or a relative path starting with "./" + unsigned int flags; // DIR_FLAG_xxx + struct dir_rule *next; +}; + +enum dir_rule_flags { + DIR_FLAG_RW = 1, + DIR_FLAG_NOEXEC = 2, + DIR_FLAG_FS = 4, + DIR_FLAG_MAYBE = 8, +}; + +static struct dir_rule *first_dir_rule; +static struct dir_rule **last_dir_rule = &first_dir_rule; + +static int add_dir_rule(char *in, char *out, unsigned int flags) +{ + // Make sure that "in" is relative + while (in[0] == '/') + in++; + if (!*in) + return 0; + + // Check "out" + if (flags & DIR_FLAG_FS) + { + if (!out || out[0] == '/') + return 0; + } + else + { + if (out && out[0] != '/' && strncmp(out, "./", 2)) + return 0; + } + + // Override an existing rule + struct dir_rule *r; + for (r = first_dir_rule; r; r=r->next) + if (!strcmp(r->inside, in)) + break; + + // Add a new rule + if (!r) + { + struct dir_rule *r = xmalloc(sizeof(*r)); + r->inside = in; + *last_dir_rule = r; + last_dir_rule = &r->next; + r->next = NULL; + } + r->outside = out; + r->flags = flags; + return 1; +} + +static unsigned int parse_dir_option(char *opt) +{ + if (!strcmp(opt, "rw")) + return DIR_FLAG_RW; + if (!strcmp(opt, "noexec")) + return DIR_FLAG_NOEXEC; + if (!strcmp(opt, "fs")) + return DIR_FLAG_FS; + if (!strcmp(opt, "maybe")) + return DIR_FLAG_MAYBE; + die("Unknown directory option %s", opt); +} + +static int set_dir_action(char *arg) +{ + arg = xstrdup(arg); + + char *colon = strchr(arg, ':'); + unsigned int flags = 0; + while (colon) + { + char *opt = colon + 1; + char *next = strchr(opt, ':'); + if (next) + *next = 0; + flags |= parse_dir_option(opt); + colon = next; + } + + char *eq = strchr(arg, '='); + if (eq) + { + *eq++ = 0; + return add_dir_rule(arg, (*eq ? eq : NULL), flags); + } + else + { + char *out = xmalloc(1 + strlen(arg) + 1); + sprintf(out, "/%s", arg); + return add_dir_rule(arg, out, flags); + } +} + +static void init_dir_rules(void) +{ + set_dir_action("box=./box:rw"); + set_dir_action("bin"); + set_dir_action("dev"); + set_dir_action("lib"); + set_dir_action("lib64:maybe"); + set_dir_action("proc=proc:fs"); + set_dir_action("usr"); +} + +static void make_dir(char *path) +{ + char *sep = path; + for (;;) + { + sep = strchr(sep, '/'); + if (sep) + *sep = 0; + + if (!dir_exists(path) && mkdir(path, 0777) < 0) + die("Cannot create directory %s: %m\n", path); + + if (!sep) + return; + *sep++ = '/'; + } +} + +static void apply_dir_rules(void) +{ + for (struct dir_rule *r = first_dir_rule; r; r=r->next) + { + char *in = r->inside; + char *out = r->outside; + if (!out) + { + msg("Not binding anything on %s\n", r->inside); + continue; + } + + if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) + { + msg("Not binding %s on %s (does not exist)\n", out, r->inside); + continue; + } + + char root_in[1024]; + snprintf(root_in, sizeof(root_in), "root/%s", in); + make_dir(root_in); + + unsigned long mount_flags = 0; + if (!(r->flags & DIR_FLAG_RW)) + mount_flags |= MS_RDONLY; + if (r->flags & DIR_FLAG_NOEXEC) + mount_flags |= MS_NOEXEC; + + if (r->flags & DIR_FLAG_FS) + { + msg("Mounting %s on %s\n", out, in); + if (mount("none", root_in, out, mount_flags, "") < 0) + die("Cannot mount %s on %s: %m", out, in); + } + else + { + msg("Binding %s on %s\n", out, in); + if (mount(out, root_in, "none", MS_BIND | MS_NOSUID | MS_NODEV | mount_flags, "") < 0) + die("Cannot bind %s on %s: %m", out, in); + } + } +} + /*** Control groups ***/ static char cg_path[256]; @@ -446,8 +647,7 @@ cg_init(void) if (!cg_enable) return; - struct stat st; - if (stat(cg_root, &st) < 0 || !S_ISDIR(st.st_mode)) + if (!dir_exists(cg_root)) die("Control group filesystem at %s not mounted", cg_root); snprintf(cg_path, sizeof(cg_path), "%s/box-%d", cg_root, BOX_UID); @@ -550,9 +750,6 @@ cg_remove(void) if (buf[0]) die("Some tasks left in control group %s, failed to remove it", cg_path); - // FIXME: Is this needed? - // cg_write("memory.force_empty", "0\n"); - if (rmdir(cg_path) < 0) die("Cannot remove control group %s: %m", cg_path); } @@ -606,11 +803,18 @@ get_wall_time_ms(void) } static int -get_run_time_ms(void) +get_run_time_ms(struct rusage *rus) { if (cg_timing) return cg_get_run_time_ms(); + if (rus) + { + struct timeval total; + timeradd(&rus->ru_utime, &rus->ru_stime, &total); + return total.tv_sec*1000 + total.tv_usec/1000; + } + char buf[PROC_BUF_SIZE], *x; int utime, stime; static int proc_stat_fd; @@ -646,7 +850,7 @@ check_timeout(void) } if (timeout) { - int ms = get_run_time_ms(); + int ms = get_run_time_ms(NULL); if (verbose > 1) fprintf(stderr, "[time check: %d msec]\n", ms); if (ms > timeout && ms > extra_timeout) @@ -657,8 +861,10 @@ check_timeout(void) static void box_keeper(void) { - struct sigaction sa; + read_errors_from_fd = error_pipes[0]; + close(error_pipes[1]); + struct sigaction sa; bzero(&sa, sizeof(sa)); sa.sa_handler = signal_int; sigaction(SIGINT, &sa, NULL); @@ -694,13 +900,22 @@ box_keeper(void) } if (p != box_pid) die("wait4: unknown pid %d exited!", p); + box_pid = 0; + + // Check error pipe if there is an internal error passed from inside the box + char interr[1024]; + int n = read(read_errors_from_fd, interr, sizeof(interr) - 1); + if (n > 0) + { + interr[n] = 0; + die("%s", interr); + } + if (WIFEXITED(stat)) { - box_pid = 0; final_stats(&rus); if (WEXITSTATUS(stat)) { - // FIXME: Recognize internal errors during setup meta_printf("exitcode:%d\n", WEXITSTATUS(stat)); err("RE: Exited with error status %d", WEXITSTATUS(stat)); } @@ -714,16 +929,14 @@ box_keeper(void) wall_ms/1000, wall_ms%1000); box_exit(0); } - if (WIFSIGNALED(stat)) + else if (WIFSIGNALED(stat)) { - box_pid = 0; meta_printf("exitsig:%d\n", WTERMSIG(stat)); final_stats(&rus); err("SG: Caught fatal signal %d", WTERMSIG(stat)); } - if (WIFSTOPPED(stat)) + else if (WIFSTOPPED(stat)) { - box_pid = 0; meta_printf("exitsig:%d\n", WSTOPSIG(stat)); final_stats(&rus); err("SG: Stopped by signal %d", WSTOPSIG(stat)); @@ -744,27 +957,7 @@ setup_root(void) if (mount("none", "root", "tmpfs", 0, "mode=755") < 0) die("Cannot mount root ramdisk: %m"); - // FIXME: Make the list of bind-mounts configurable - // FIXME: Virtual /dev? - // FIXME: Read-only mounts? - - static const char * const dirs[] = { "box", "/bin", "/lib", "/usr", "/dev" }; - for (int i=0; i < ARRAY_SIZE(dirs); i++) - { - const char *d = dirs[i]; - char buf[1024]; // FIXME - sprintf(buf, "root/%s", (d[0] == '/' ? d+1 : d)); - msg("Binding %s on %s\n", d, buf); - if (mkdir(buf, 0755) < 0) - die("mkdir(%s): %m", buf); - if (mount(d, buf, "none", MS_BIND | MS_NOSUID | MS_NODEV, "") < 0) - die("Cannot bind %s on %s: %m", d, buf); - } - - if (mkdir("root/proc", 0755) < 0) - die("Cannot create proc: %m"); - if (mount("none", "root/proc", "proc", 0, "") < 0) - die("Cannot mount proc: %m"); + apply_dir_rules(); if (chroot("root") < 0) die("Chroot failed: %m"); @@ -773,30 +966,21 @@ setup_root(void) die("Cannot change current directory: %m"); } -static int -box_inside(void *arg) +static void +setup_credentials(void) { - char **argv = arg; - int argc = 0; - while (argv[argc]) - argc++; - - struct rlimit rl; - char *args[argc+1]; - - memcpy(args, argv, argc * sizeof(char *)); - args[argc] = NULL; - - cg_enter(); - setup_root(); - if (setresgid(BOX_GID, BOX_GID, BOX_GID) < 0) die("setresgid: %m"); if (setgroups(0, NULL) < 0) die("setgroups: %m"); if (setresuid(BOX_UID, BOX_UID, BOX_UID) < 0) die("setresuid: %m"); + setpgrp(); +} +static void +setup_fds(void) +{ if (redir_stdin) { close(0); @@ -817,39 +1001,54 @@ box_inside(void *arg) } else dup2(1, 2); - setpgrp(); +} - if (memory_limit) - { - rl.rlim_cur = rl.rlim_max = memory_limit * 1024; - if (setrlimit(RLIMIT_AS, &rl) < 0) - die("setrlimit(RLIMIT_AS): %m"); - } +static void +setup_rlim(const char *res_name, int res, rlim_t limit) +{ + struct rlimit rl = { .rlim_cur = limit, .rlim_max = limit }; + if (setrlimit(res, &rl) < 0) + die("setrlimit(%s, %jd)", res_name, (intmax_t) limit); +} - rl.rlim_cur = rl.rlim_max = (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY); - if (setrlimit(RLIMIT_STACK, &rl) < 0) - die("setrlimit(RLIMIT_STACK): %m"); +static void +setup_rlimits(void) +{ +#define RLIM(res, val) setup_rlim("RLIMIT_" #res, RLIMIT_##res, val) + + if (memory_limit) + RLIM(AS, memory_limit * 1024); - rl.rlim_cur = rl.rlim_max = 64; - if (setrlimit(RLIMIT_NOFILE, &rl) < 0) - die("setrlimit(RLIMIT_NOFILE): %m"); + RLIM(STACK, (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY)); + RLIM(NOFILE, 64); + RLIM(MEMLOCK, 0); if (max_processes) - { - rl.rlim_cur = rl.rlim_max = max_processes; - if (setrlimit(RLIMIT_NPROC, &rl) < 0) - die("setrlimit(RLIMIT_NPROC): %m"); - } + RLIM(NPROC, max_processes); - rl.rlim_cur = rl.rlim_max = 0; - if (setrlimit(RLIMIT_MEMLOCK, &rl) < 0) - die("setrlimit(RLIMIT_MEMLOCK): %m"); +#undef RLIM +} +static int +box_inside(void *arg) +{ + char **args = arg; + write_errors_to_fd = error_pipes[1]; + close(error_pipes[0]); + + cg_enter(); + setup_root(); + setup_credentials(); + setup_fds(); + setup_rlimits(); char **env = setup_environment(); + execve(args[0], args, env); die("execve(\"%s\"): %m", args[0]); } +/*** Commands ***/ + static void init(void) { @@ -866,8 +1065,7 @@ init(void) static void cleanup(void) { - struct stat st; - if (stat("box", &st) < 0 || !S_ISDIR(st.st_mode)) + if (!dir_exists("box")) die("Box directory not found, there isn't anything to clean up"); msg("Deleting sandbox directory\n"); @@ -878,8 +1076,7 @@ cleanup(void) static void run(char **argv) { - struct stat st; - if (stat("box", &st) < 0 || !S_ISDIR(st.st_mode)) + if (!dir_exists("box")) die("Box directory not found, did you run `isolate --init'?"); char cmd[256]; @@ -887,6 +1084,13 @@ run(char **argv) xsystem(cmd); snprintf(cleanup_cmd, sizeof(cleanup_cmd), "chown -R %d.%d box", orig_uid, orig_gid); + if (pipe(error_pipes) < 0) + die("pipe: %m"); + for (int i=0; i<2; i++) + if (fcntl(error_pipes[i], F_SETFD, fcntl(error_pipes[i], F_GETFD) | FD_CLOEXEC) < 0 || + fcntl(error_pipes[i], F_SETFL, fcntl(error_pipes[i], F_GETFL) | O_NONBLOCK) < 0) + die("fcntl on pipe: %m"); + box_pid = clone( box_inside, // Function to execute as the body of the new process argv, // Pass our stack @@ -902,13 +1106,15 @@ run(char **argv) static void show_version(void) { - // FIXME - printf("Process isolator 0.0\n"); - printf("(c) 2012 Martin Mares \n\n"); + printf("Process isolator 1.0\n"); + printf("(c) 2012 Martin Mares and Bernard Blackham\n"); + printf("\nCompile-time configuration:\n"); printf("Sandbox directory: %s\n", BOX_DIR); printf("Sandbox credentials: uid=%u gid=%u\n", BOX_UID, BOX_GID); } +/*** Options ***/ + static void usage(void) { @@ -920,7 +1126,11 @@ Options:\n\ -c, --cg[=]\tPut process in a control group (optionally a sub-group of )\n\ --cg-mem=\tLimit memory usage of the control group to KB\n\ --cg-timing\t\tTime limits affects total run time of the control group\n\ --E, --env=\tInherit the environment variable from the parent process\n\ +-d, --dir=\t\tMake a directory visible inside the sandbox\n\ + --dir==\tMake a directory outside visible as inside\n\ + --dir==\t\tDelete a previously defined directory rule (even a default one)\n\ + --dir=...:\tSpecify options for a rule: rw, noexec, fs, maybe\n\ +-E, --env=\t\tInherit the environment variable from the parent process\n\ -E, --env==\tSet the environment variable to ; unset it if is empty\n\ -x, --extra-time=