X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=isolate%2Fisolate.c;h=c383664619d0698336702e3e2b755b58f97dfed0;hb=4af13f1bd0c9c39b756071754b513a4797c88f46;hp=fff31f4bb4235bb142d0957b2b5a5260a624b7fa;hpb=543b6e4dce5b96c825be448f83667f8c6500b94f;p=moe.git diff --git a/isolate/isolate.c b/isolate/isolate.c index fff31f4..c383664 100644 --- a/isolate/isolate.c +++ b/isolate/isolate.c @@ -21,21 +21,21 @@ #include #include #include +#include +#include #include #include #include #include #include #include +#include +#include #define NONRET __attribute__((noreturn)) #define UNUSED __attribute__((unused)) #define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) -#define BOX_DIR CONFIG_ISOLATE_BOX_DIR -#define BOX_UID CONFIG_ISOLATE_BOX_UID -#define BOX_GID CONFIG_ISOLATE_BOX_GID - static int timeout; /* milliseconds */ static int wall_timeout; static int extra_timeout; @@ -43,18 +43,24 @@ static int pass_environ; static int verbose; static int memory_limit; static int stack_limit; +static int block_quota; +static int inode_quota; static int max_processes = 1; static char *redir_stdin, *redir_stdout, *redir_stderr; static int cg_enable; static int cg_memory_limit; static int cg_timing; -static char *cg_root = "/sys/fs/cgroup"; +static int box_id; +static char box_dir[1024]; +static pid_t box_pid; + +static uid_t box_uid; +static gid_t box_gid; static uid_t orig_uid; static gid_t orig_gid; -static pid_t box_pid; static int partial_line; static char cleanup_cmd[256]; @@ -70,7 +76,7 @@ static int read_errors_from_fd; static void die(char *msg, ...) NONRET; static void cg_stats(void); static int get_wall_time_ms(void); -static int get_run_time_ms(void); +static int get_run_time_ms(struct rusage *rus); /*** Meta-files ***/ @@ -111,7 +117,7 @@ meta_printf(const char *fmt, ...) static void final_stats(struct rusage *rus) { - total_ms = get_run_time_ms(); + total_ms = get_run_time_ms(rus); wall_ms = get_wall_time_ms(); meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000); @@ -232,6 +238,8 @@ msg(char *msg, ...) va_end(args); } +/*** Utility functions ***/ + static void * xmalloc(size_t size) { @@ -241,6 +249,21 @@ xmalloc(size_t size) return p; } +static char * +xstrdup(char *str) +{ + char *p = strdup(str); + if (!p) + die("Out of memory"); + return p; +} + +static int dir_exists(char *path) +{ + struct stat st; + return (stat(path, &st) >= 0 && S_ISDIR(st.st_mode)); +} + /*** Environment rules ***/ struct env_rule { @@ -376,6 +399,184 @@ setup_environment(void) return env; } +/*** Directory rules ***/ + +struct dir_rule { + char *inside; // A relative path + char *outside; // This can be an absolute path or a relative path starting with "./" + unsigned int flags; // DIR_FLAG_xxx + struct dir_rule *next; +}; + +enum dir_rule_flags { + DIR_FLAG_RW = 1, + DIR_FLAG_NOEXEC = 2, + DIR_FLAG_FS = 4, + DIR_FLAG_MAYBE = 8, + DIR_FLAG_DEV = 16, +}; + +static const char * const dir_flag_names[] = { "rw", "noexec", "fs", "maybe", "dev" }; + +static struct dir_rule *first_dir_rule; +static struct dir_rule **last_dir_rule = &first_dir_rule; + +static int add_dir_rule(char *in, char *out, unsigned int flags) +{ + // Make sure that "in" is relative + while (in[0] == '/') + in++; + if (!*in) + return 0; + + // Check "out" + if (flags & DIR_FLAG_FS) + { + if (!out || out[0] == '/') + return 0; + } + else + { + if (out && out[0] != '/' && strncmp(out, "./", 2)) + return 0; + } + + // Override an existing rule + struct dir_rule *r; + for (r = first_dir_rule; r; r = r->next) + if (!strcmp(r->inside, in)) + break; + + // Add a new rule + if (!r) + { + r = xmalloc(sizeof(*r)); + r->inside = in; + *last_dir_rule = r; + last_dir_rule = &r->next; + r->next = NULL; + } + r->outside = out; + r->flags = flags; + return 1; +} + +static unsigned int parse_dir_option(char *opt) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(dir_flag_names); i++) + if (!strcmp(opt, dir_flag_names[i])) + return 1U << i; + die("Unknown directory option %s", opt); +} + +static int set_dir_action(char *arg) +{ + arg = xstrdup(arg); + + char *colon = strchr(arg, ':'); + unsigned int flags = 0; + while (colon) + { + *colon++ = 0; + char *next = strchr(colon, ':'); + if (next) + *next = 0; + flags |= parse_dir_option(colon); + colon = next; + } + + char *eq = strchr(arg, '='); + if (eq) + { + *eq++ = 0; + return add_dir_rule(arg, (*eq ? eq : NULL), flags); + } + else + { + char *out = xmalloc(1 + strlen(arg) + 1); + sprintf(out, "/%s", arg); + return add_dir_rule(arg, out, flags); + } +} + +static void init_dir_rules(void) +{ + set_dir_action("box=./box:rw"); + set_dir_action("bin"); + set_dir_action("dev:dev"); + set_dir_action("lib"); + set_dir_action("lib64:maybe"); + set_dir_action("proc=proc:fs"); + set_dir_action("usr"); +} + +static void make_dir(char *path) +{ + char *sep = (path[0] == '/' ? path+1 : path); + + for (;;) + { + sep = strchr(sep, '/'); + if (sep) + *sep = 0; + + if (!dir_exists(path) && mkdir(path, 0777) < 0) + die("Cannot create directory %s: %m\n", path); + + if (!sep) + return; + *sep++ = '/'; + } +} + +static void apply_dir_rules(void) +{ + for (struct dir_rule *r = first_dir_rule; r; r=r->next) + { + char *in = r->inside; + char *out = r->outside; + if (!out) + { + msg("Not binding anything on %s\n", r->inside); + continue; + } + + if ((r->flags & DIR_FLAG_MAYBE) && !dir_exists(out)) + { + msg("Not binding %s on %s (does not exist)\n", out, r->inside); + continue; + } + + char root_in[1024]; + snprintf(root_in, sizeof(root_in), "root/%s", in); + make_dir(root_in); + + unsigned long mount_flags = 0; + if (!(r->flags & DIR_FLAG_RW)) + mount_flags |= MS_RDONLY; + if (r->flags & DIR_FLAG_NOEXEC) + mount_flags |= MS_NOEXEC; + if (!(r->flags & DIR_FLAG_DEV)) + mount_flags |= MS_NODEV; + + if (r->flags & DIR_FLAG_FS) + { + msg("Mounting %s on %s (flags %lx)\n", out, in, mount_flags); + if (mount("none", root_in, out, mount_flags, "") < 0) + die("Cannot mount %s on %s: %m", out, in); + } + else + { + mount_flags |= MS_BIND | MS_NOSUID; + msg("Binding %s on %s (flags %lx)\n", out, in, mount_flags); + // Most mount flags need remount to work + if (mount(out, root_in, "none", mount_flags, "") < 0 || + mount(out, root_in, "none", MS_REMOUNT | mount_flags, "") < 0) + die("Cannot mount %s on %s: %m", out, in); + } + } +} + /*** Control groups ***/ static char cg_path[256]; @@ -456,11 +657,11 @@ cg_init(void) if (!cg_enable) return; - struct stat st; - if (stat(cg_root, &st) < 0 || !S_ISDIR(st.st_mode)) + char *cg_root = CONFIG_ISOLATE_CGROUP_ROOT; + if (!dir_exists(cg_root)) die("Control group filesystem at %s not mounted", cg_root); - snprintf(cg_path, sizeof(cg_path), "%s/box-%d", cg_root, BOX_UID); + snprintf(cg_path, sizeof(cg_path), "%s/box-%d", cg_root, box_id); msg("Using control group %s\n", cg_path); } @@ -564,6 +765,86 @@ cg_remove(void) die("Cannot remove control group %s: %m", cg_path); } +/*** Disk quotas ***/ + +static int +path_begins_with(char *path, char *with) +{ + while (*with) + if (*path++ != *with++) + return 0; + return (!*with || *with == '/'); +} + +static char * +find_device(char *path) +{ + FILE *f = setmntent("/proc/mounts", "r"); + if (!f) + die("Cannot open /proc/mounts: %m"); + + struct mntent *me; + int best_len = 0; + char *best_dev = NULL; + while (me = getmntent(f)) + { + if (!path_begins_with(me->mnt_fsname, "/dev")) + continue; + if (path_begins_with(path, me->mnt_dir)) + { + int len = strlen(me->mnt_dir); + if (len > best_len) + { + best_len = len; + free(best_dev); + best_dev = xstrdup(me->mnt_fsname); + } + } + } + endmntent(f); + return best_dev; +} + +static void +set_quota(void) +{ + if (!block_quota) + return; + + char cwd[PATH_MAX]; + if (!getcwd(cwd, sizeof(cwd))) + die("getcwd: %m"); + + char *dev = find_device(cwd); + if (!dev) + die("Cannot identify filesystem which contains %s", cwd); + msg("Quota: Mapped path %s to a filesystem on %s\n", cwd, dev); + + // Sanity check + struct stat dev_st, cwd_st; + if (stat(dev, &dev_st) < 0) + die("Cannot identify block device %s: %m", dev); + if (!S_ISBLK(dev_st.st_mode)) + die("Expected that %s is a block device", dev); + if (stat(".", &cwd_st) < 0) + die("Cannot stat cwd: %m"); + if (cwd_st.st_dev != dev_st.st_rdev) + die("Identified %s as a filesystem on %s, but it is obviously false", cwd, dev); + + struct dqblk dq = { + .dqb_bhardlimit = block_quota, + .dqb_bsoftlimit = block_quota, + .dqb_ihardlimit = inode_quota, + .dqb_isoftlimit = inode_quota, + .dqb_valid = QIF_LIMITS, + }; + if (quotactl(QCMD(Q_SETQUOTA, USRQUOTA), dev, box_uid, (caddr_t) &dq) < 0) + die("Cannot set disk quota: %m"); + msg("Quota: Set block quota %d and inode quota %d\n", block_quota, inode_quota); + + free(dev); +} + /*** The keeper process ***/ static void @@ -613,11 +894,18 @@ get_wall_time_ms(void) } static int -get_run_time_ms(void) +get_run_time_ms(struct rusage *rus) { if (cg_timing) return cg_get_run_time_ms(); + if (rus) + { + struct timeval total; + timeradd(&rus->ru_utime, &rus->ru_stime, &total); + return total.tv_sec*1000 + total.tv_usec/1000; + } + char buf[PROC_BUF_SIZE], *x; int utime, stime; static int proc_stat_fd; @@ -653,7 +941,7 @@ check_timeout(void) } if (timeout) { - int ms = get_run_time_ms(); + int ms = get_run_time_ms(NULL); if (verbose > 1) fprintf(stderr, "[time check: %d msec]\n", ms); if (ms > timeout && ms > extra_timeout) @@ -760,23 +1048,7 @@ setup_root(void) if (mount("none", "root", "tmpfs", 0, "mode=755") < 0) die("Cannot mount root ramdisk: %m"); - static const char * const dirs[] = { "box", "/bin", "/lib", "/usr", "/dev" }; - for (int i=0; i < ARRAY_SIZE(dirs); i++) - { - const char *d = dirs[i]; - char buf[1024]; - snprintf(buf, sizeof(buf), "root/%s", (d[0] == '/' ? d+1 : d)); - msg("Binding %s on %s\n", d, buf); - if (mkdir(buf, 0755) < 0) - die("mkdir(%s): %m", buf); - if (mount(d, buf, "none", MS_BIND | MS_NOSUID | MS_NODEV, "") < 0) - die("Cannot bind %s on %s: %m", d, buf); - } - - if (mkdir("root/proc", 0755) < 0) - die("Cannot create proc: %m"); - if (mount("none", "root/proc", "proc", 0, "") < 0) - die("Cannot mount proc: %m"); + apply_dir_rules(); if (chroot("root") < 0) die("Chroot failed: %m"); @@ -788,11 +1060,11 @@ setup_root(void) static void setup_credentials(void) { - if (setresgid(BOX_GID, BOX_GID, BOX_GID) < 0) + if (setresgid(box_gid, box_gid, box_gid) < 0) die("setresgid: %m"); if (setgroups(0, NULL) < 0) die("setgroups: %m"); - if (setresuid(BOX_UID, BOX_UID, BOX_UID) < 0) + if (setresuid(box_uid, box_uid, box_uid) < 0) die("setresuid: %m"); setpgrp(); } @@ -866,6 +1138,20 @@ box_inside(void *arg) die("execve(\"%s\"): %m", args[0]); } +static void +box_init(void) +{ + if (box_id < 0 || box_id >= CONFIG_ISOLATE_NUM_BOXES) + die("Sandbox ID out of range (allowed: 0-%d)", CONFIG_ISOLATE_NUM_BOXES-1); + box_uid = CONFIG_ISOLATE_FIRST_UID + box_id; + box_gid = CONFIG_ISOLATE_FIRST_GID + box_id; + + snprintf(box_dir, sizeof(box_dir), "%s/%d", CONFIG_ISOLATE_BOX_DIR, box_id); + make_dir(box_dir); + if (chdir(box_dir) < 0) + die("chdir(%s): %m", box_dir); +} + /*** Commands ***/ static void @@ -879,29 +1165,32 @@ init(void) die("Cannot chown box: %m"); cg_prepare(); + set_quota(); + + puts(box_dir); } static void cleanup(void) { - struct stat st; - if (stat("box", &st) < 0 || !S_ISDIR(st.st_mode)) + if (!dir_exists("box")) die("Box directory not found, there isn't anything to clean up"); msg("Deleting sandbox directory\n"); - xsystem("rm -rf box"); + xsystem("rm -rf *"); + if (rmdir(box_dir) < 0) + die("Cannot remove %s: %m", box_dir); cg_remove(); } static void run(char **argv) { - struct stat st; - if (stat("box", &st) < 0 || !S_ISDIR(st.st_mode)) + if (!dir_exists("box")) die("Box directory not found, did you run `isolate --init'?"); char cmd[256]; - snprintf(cmd, sizeof(cmd), "chown -R %d.%d box", BOX_UID, BOX_GID); + snprintf(cmd, sizeof(cmd), "chown -R %d.%d box", box_uid, box_gid); xsystem(cmd); snprintf(cleanup_cmd, sizeof(cleanup_cmd), "chown -R %d.%d box", orig_uid, orig_gid); @@ -930,8 +1219,12 @@ show_version(void) printf("Process isolator 1.0\n"); printf("(c) 2012 Martin Mares and Bernard Blackham\n"); printf("\nCompile-time configuration:\n"); - printf("Sandbox directory: %s\n", BOX_DIR); - printf("Sandbox credentials: uid=%u gid=%u\n", BOX_UID, BOX_GID); + printf("Sandbox directory: %s\n", CONFIG_ISOLATE_BOX_DIR); + printf("Sandbox credentials: uid=%u-%u gid=%u-%u\n", + CONFIG_ISOLATE_FIRST_UID, + CONFIG_ISOLATE_FIRST_UID + CONFIG_ISOLATE_NUM_BOXES - 1, + CONFIG_ISOLATE_FIRST_GID, + CONFIG_ISOLATE_FIRST_GID + CONFIG_ISOLATE_NUM_BOXES - 1); } /*** Options ***/ @@ -944,16 +1237,27 @@ usage(void) Usage: isolate [] \n\ \n\ Options:\n\ +-b, --box-id=\tWhen multiple sandboxes are used in parallel, each must get a unique ID\n\ -c, --cg[=]\tPut process in a control group (optionally a sub-group of )\n\ --cg-mem=\tLimit memory usage of the control group to KB\n\ --cg-timing\t\tTime limits affects total run time of the control group\n\ --E, --env=\tInherit the environment variable from the parent process\n\ +-d, --dir=\t\tMake a directory visible inside the sandbox\n\ + --dir==\tMake a directory outside visible as inside\n\ + --dir==\t\tDelete a previously defined directory rule (even a default one)\n\ + --dir=...:\tSpecify options for a rule:\n\ +\t\t\t\tdev\tAllow access to special files\n\ +\t\t\t\tfs\tMount a filesystem (e.g., --dir=/proc:proc:fs)\n\ +\t\t\t\tmaybe\tSkip the rule if does not exist\n\ +\t\t\t\tnoexec\tDo not allow execution of binaries\n\ +\t\t\t\trw\tAllow read-write access\n\ +-E, --env=\t\tInherit the environment variable from the parent process\n\ -E, --env==\tSet the environment variable to ; unset it if is empty\n\ -x, --extra-time=