From e3e1bb8d1834d5a8472a13da6ecbc10fc454b640 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 26 Mar 2012 19:27:36 +0200 Subject: [PATCH 1/1] Isolate: Some bits shamelessly stolen from box.c --- Makefile | 1 + isolate/Makefile | 7 + isolate/isolate.c | 696 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 704 insertions(+) create mode 100644 isolate/Makefile create mode 100644 isolate/isolate.c diff --git a/Makefile b/Makefile index 4b4794c..9480ae5 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,7 @@ TESTS= endif include $(s)/box/Makefile +include $(s)/isolate/Makefile include $(s)/utils/Makefile include $(s)/eval/Makefile include $(s)/judge/Makefile diff --git a/isolate/Makefile b/isolate/Makefile new file mode 100644 index 0000000..a4aeff7 --- /dev/null +++ b/isolate/Makefile @@ -0,0 +1,7 @@ +# Makefile for MO-Eval isolator +# (c) 2012 Martin Mares + +DIRS+=isolate +PROGS+=$(o)/isolate/isolate + +$(o)/isolate/isolate: $(o)/isolate/isolate.o diff --git a/isolate/isolate.c b/isolate/isolate.c new file mode 100644 index 0000000..9ad969d --- /dev/null +++ b/isolate/isolate.c @@ -0,0 +1,696 @@ +/* + * A Process Isolator based in Linux Containers + * + * (c) 2012 Martin Mares + */ + +#define _GNU_SOURCE + +#include "autoconf.h" + +// FIXME: prune +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0])) + +static int timeout; /* milliseconds */ +static int wall_timeout; +static int extra_timeout; +static int pass_environ; +static int verbose; +static int memory_limit; +static int stack_limit; +static char *redir_stdin, *redir_stdout, *redir_stderr; +static char *set_cwd; + +static pid_t box_pid; +static volatile int timer_tick; +static struct timeval start_time; +static int ticks_per_sec; +static int partial_line; + +static int mem_peak_kb; +static int total_ms, wall_ms; + +static void die(char *msg, ...) NONRET; +static void sample_mem_peak(void); + +/*** Meta-files ***/ + +static FILE *metafile; + +static void +meta_open(const char *name) +{ + if (!strcmp(name, "-")) + { + metafile = stdout; + return; + } + metafile = fopen(name, "w"); + if (!metafile) + die("Failed to open metafile '%s'",name); +} + +static void +meta_close(void) +{ + if (metafile && metafile != stdout) + fclose(metafile); +} + +static void __attribute__((format(printf,1,2))) +meta_printf(const char *fmt, ...) +{ + if (!metafile) + return; + + va_list args; + va_start(args, fmt); + vfprintf(metafile, fmt, args); + va_end(args); +} + +static void +final_stats(struct rusage *rus) +{ + struct timeval total, now, wall; + timeradd(&rus->ru_utime, &rus->ru_stime, &total); + total_ms = total.tv_sec*1000 + total.tv_usec/1000; + gettimeofday(&now, NULL); + timersub(&now, &start_time, &wall); + wall_ms = wall.tv_sec*1000 + wall.tv_usec/1000; + + meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000); + meta_printf("time-wall:%d.%03d\n", wall_ms/1000, wall_ms%1000); + meta_printf("mem:%llu\n", (unsigned long long) mem_peak_kb * 1024); +} + +/*** Messages and exits ***/ + +static void NONRET +box_exit(int rc) +{ + if (box_pid > 0) + { + sample_mem_peak(); + kill(-box_pid, SIGKILL); + kill(box_pid, SIGKILL); + meta_printf("killed:1\n"); + + struct rusage rus; + int p, stat; + do + p = wait4(box_pid, &stat, 0, &rus); + while (p < 0 && errno == EINTR); + if (p < 0) + fprintf(stderr, "UGH: Lost track of the process (%m)\n"); + else + final_stats(&rus); + } + meta_close(); + exit(rc); +} + +static void +flush_line(void) +{ + if (partial_line) + fputc('\n', stderr); + partial_line = 0; +} + +/* Report an error of the sandbox itself */ +static void NONRET __attribute__((format(printf,1,2))) +die(char *msg, ...) +{ + va_list args; + va_start(args, msg); + flush_line(); + char buf[1024]; + vsnprintf(buf, sizeof(buf), msg, args); + meta_printf("status:XX\nmessage:%s\n", buf); + fputs(buf, stderr); + fputc('\n', stderr); + box_exit(2); +} + +/* Report an error of the program inside the sandbox */ +static void NONRET __attribute__((format(printf,1,2))) +err(char *msg, ...) +{ + va_list args; + va_start(args, msg); + flush_line(); + if (msg[0] && msg[1] && msg[2] == ':' && msg[3] == ' ') + { + meta_printf("status:%c%c\n", msg[0], msg[1]); + msg += 4; + } + char buf[1024]; + vsnprintf(buf, sizeof(buf), msg, args); + meta_printf("message:%s\n", buf); + fputs(buf, stderr); + fputc('\n', stderr); + box_exit(1); +} + +/* Write a message, but only if in verbose mode */ +static void __attribute__((format(printf,1,2))) +msg(char *msg, ...) +{ + va_list args; + va_start(args, msg); + if (verbose) + { + int len = strlen(msg); + if (len > 0) + partial_line = (msg[len-1] != '\n'); + vfprintf(stderr, msg, args); + fflush(stderr); + } + va_end(args); +} + +static void * +xmalloc(size_t size) +{ + void *p = malloc(size); + if (!p) + die("Out of memory"); + return p; +} + +/*** Environment rules ***/ + +struct env_rule { + char *var; // Variable to match + char *val; // ""=clear, NULL=inherit + int var_len; + struct env_rule *next; +}; + +static struct env_rule *first_env_rule; +static struct env_rule **last_env_rule = &first_env_rule; + +static struct env_rule default_env_rules[] = { + { "LIBC_FATAL_STDERR_", "1" } +}; + +static int +set_env_action(char *a0) +{ + struct env_rule *r = xmalloc(sizeof(*r) + strlen(a0) + 1); + char *a = (char *)(r+1); + strcpy(a, a0); + + char *sep = strchr(a, '='); + if (sep == a) + return 0; + r->var = a; + if (sep) + { + *sep++ = 0; + r->val = sep; + } + else + r->val = NULL; + *last_env_rule = r; + last_env_rule = &r->next; + r->next = NULL; + return 1; +} + +static int +match_env_var(char *env_entry, struct env_rule *r) +{ + if (strncmp(env_entry, r->var, r->var_len)) + return 0; + return (env_entry[r->var_len] == '='); +} + +static void +apply_env_rule(char **env, int *env_sizep, struct env_rule *r) +{ + // First remove the variable if already set + int pos = 0; + while (pos < *env_sizep && !match_env_var(env[pos], r)) + pos++; + if (pos < *env_sizep) + { + (*env_sizep)--; + env[pos] = env[*env_sizep]; + env[*env_sizep] = NULL; + } + + // What is the new value? + char *new; + if (r->val) + { + if (!r->val[0]) + return; + new = xmalloc(r->var_len + 1 + strlen(r->val) + 1); + sprintf(new, "%s=%s", r->var, r->val); + } + else + { + pos = 0; + while (environ[pos] && !match_env_var(environ[pos], r)) + pos++; + if (!(new = environ[pos])) + return; + } + + // Add it at the end of the array + env[(*env_sizep)++] = new; + env[*env_sizep] = NULL; +} + +static char ** +setup_environment(void) +{ + // Link built-in rules with user rules + for (int i=ARRAY_SIZE(default_env_rules)-1; i >= 0; i--) + { + default_env_rules[i].next = first_env_rule; + first_env_rule = &default_env_rules[i]; + } + + // Scan the original environment + char **orig_env = environ; + int orig_size = 0; + while (orig_env[orig_size]) + orig_size++; + + // For each rule, reserve one more slot and calculate length + int num_rules = 0; + for (struct env_rule *r = first_env_rule; r; r=r->next) + { + num_rules++; + r->var_len = strlen(r->var); + } + + // Create a new environment + char **env = xmalloc((orig_size + num_rules + 1) * sizeof(char *)); + int size; + if (pass_environ) + { + memcpy(env, environ, orig_size * sizeof(char *)); + size = orig_size; + } + else + size = 0; + env[size] = NULL; + + // Apply the rules one by one + for (struct env_rule *r = first_env_rule; r; r=r->next) + apply_env_rule(env, &size, r); + + // Return the new env and pass some gossip + if (verbose > 1) + { + fprintf(stderr, "Passing environment:\n"); + for (int i=0; env[i]; i++) + fprintf(stderr, "\t%s\n", env[i]); + } + return env; +} + +/*** FIXME ***/ + +static void +signal_alarm(int unused UNUSED) +{ + /* Time limit checks are synchronous, so we only schedule them there. */ + timer_tick = 1; + alarm(1); +} + +static void +signal_int(int unused UNUSED) +{ + /* Interrupts are fatal, so no synchronization requirements. */ + meta_printf("exitsig:%d\n", SIGINT); + err("SG: Interrupted"); +} + +#define PROC_BUF_SIZE 4096 +static void +read_proc_file(char *buf, char *name, int *fdp) +{ + int c; + + if (!*fdp) + { + sprintf(buf, "/proc/%d/%s", (int) box_pid, name); + *fdp = open(buf, O_RDONLY); + if (*fdp < 0) + die("open(%s): %m", buf); + } + lseek(*fdp, 0, SEEK_SET); + if ((c = read(*fdp, buf, PROC_BUF_SIZE-1)) < 0) + die("read on /proc/$pid/%s: %m", name); + if (c >= PROC_BUF_SIZE-1) + die("/proc/$pid/%s too long", name); + buf[c] = 0; +} + +static void +check_timeout(void) +{ + if (wall_timeout) + { + struct timeval now, wall; + int wall_ms; + gettimeofday(&now, NULL); + timersub(&now, &start_time, &wall); + wall_ms = wall.tv_sec*1000 + wall.tv_usec/1000; + if (wall_ms > wall_timeout) + err("TO: Time limit exceeded (wall clock)"); + if (verbose > 1) + fprintf(stderr, "[wall time check: %d msec]\n", wall_ms); + } + if (timeout) + { + char buf[PROC_BUF_SIZE], *x; + int utime, stime, ms; + static int proc_stat_fd; + read_proc_file(buf, "stat", &proc_stat_fd); + x = buf; + while (*x && *x != ' ') + x++; + while (*x == ' ') + x++; + if (*x++ != '(') + die("proc stat syntax error 1"); + while (*x && (*x != ')' || x[1] != ' ')) + x++; + while (*x == ')' || *x == ' ') + x++; + if (sscanf(x, "%*c %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %d %d", &utime, &stime) != 2) + die("proc stat syntax error 2"); + ms = (utime + stime) * 1000 / ticks_per_sec; + if (verbose > 1) + fprintf(stderr, "[time check: %d msec]\n", ms); + if (ms > timeout && ms > extra_timeout) + err("TO: Time limit exceeded"); + } +} + +static void +sample_mem_peak(void) +{ + /* + * We want to find out the peak memory usage of the process, which is + * maintained by the kernel, but unforunately it gets lost when the + * process exits (it is not reported in struct rusage). Therefore we + * have to sample it whenever we suspect that the process is about + * to exit. + */ + char buf[PROC_BUF_SIZE], *x; + static int proc_status_fd; + read_proc_file(buf, "status", &proc_status_fd); + + x = buf; + while (*x) + { + char *key = x; + while (*x && *x != ':' && *x != '\n') + x++; + if (!*x || *x == '\n') + break; + *x++ = 0; + while (*x == ' ' || *x == '\t') + x++; + + char *val = x; + while (*x && *x != '\n') + x++; + if (!*x) + break; + *x++ = 0; + + if (!strcmp(key, "VmPeak")) + { + int peak = atoi(val); + if (peak > mem_peak_kb) + mem_peak_kb = peak; + } + } + + if (verbose > 1) + msg("[mem-peak: %u KB]\n", mem_peak_kb); +} + +static void +boxkeeper(void) +{ + struct sigaction sa; + + bzero(&sa, sizeof(sa)); + sa.sa_handler = signal_int; + sigaction(SIGINT, &sa, NULL); + + gettimeofday(&start_time, NULL); + ticks_per_sec = sysconf(_SC_CLK_TCK); + if (ticks_per_sec <= 0) + die("Invalid ticks_per_sec!"); + + if (timeout || wall_timeout) + { + sa.sa_handler = signal_alarm; + sigaction(SIGALRM, &sa, NULL); + alarm(1); + } + + for(;;) + { + struct rusage rus; + int stat; + pid_t p; + if (timer_tick) + { + check_timeout(); + timer_tick = 0; + } + p = wait4(box_pid, &stat, WUNTRACED, &rus); + if (p < 0) + { + if (errno == EINTR) + continue; + die("wait4: %m"); + } + if (p != box_pid) + die("wait4: unknown pid %d exited!", p); + if (WIFEXITED(stat)) + { + box_pid = 0; + final_stats(&rus); + if (WEXITSTATUS(stat)) + { + // FIXME: Recognize internal errors during setup + meta_printf("exitcode:%d\n", WEXITSTATUS(stat)); + err("RE: Exited with error status %d", WEXITSTATUS(stat)); + } + if (timeout && total_ms > timeout) + err("TO: Time limit exceeded"); + if (wall_timeout && wall_ms > wall_timeout) + err("TO: Time limit exceeded (wall clock)"); + flush_line(); + fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall, %d MB)\n", + total_ms/1000, total_ms%1000, + wall_ms/1000, wall_ms%1000, + (mem_peak_kb + 1023) / 1024); + box_exit(0); + } + if (WIFSIGNALED(stat)) + { + box_pid = 0; + meta_printf("exitsig:%d\n", WTERMSIG(stat)); + final_stats(&rus); + err("SG: Caught fatal signal %d", WTERMSIG(stat)); + } + if (WIFSTOPPED(stat)) + { + box_pid = 0; + meta_printf("exitsig:%d\n", WSTOPSIG(stat)); + final_stats(&rus); + err("SG: Stopped by signal %d", WSTOPSIG(stat)); + } + else + die("wait4: unknown status %x, giving up!", stat); + } +} + +static void +box_inside(int argc, char **argv) +{ + struct rlimit rl; + char *args[argc+1]; + + memcpy(args, argv, argc * sizeof(char *)); + args[argc] = NULL; + if (set_cwd && chdir(set_cwd)) + die("chdir: %m"); + if (redir_stdin) + { + close(0); + if (open(redir_stdin, O_RDONLY) != 0) + die("open(\"%s\"): %m", redir_stdin); + } + if (redir_stdout) + { + close(1); + if (open(redir_stdout, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 1) + die("open(\"%s\"): %m", redir_stdout); + } + if (redir_stderr) + { + close(2); + if (open(redir_stderr, O_WRONLY | O_CREAT | O_TRUNC, 0666) != 2) + die("open(\"%s\"): %m", redir_stderr); + } + else + dup2(1, 2); + setpgrp(); + + if (memory_limit) + { + rl.rlim_cur = rl.rlim_max = memory_limit * 1024; + if (setrlimit(RLIMIT_AS, &rl) < 0) + die("setrlimit(RLIMIT_AS): %m"); + } + + rl.rlim_cur = rl.rlim_max = (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY); + if (setrlimit(RLIMIT_STACK, &rl) < 0) + die("setrlimit(RLIMIT_STACK): %m"); + + rl.rlim_cur = rl.rlim_max = 64; + if (setrlimit(RLIMIT_NOFILE, &rl) < 0) + die("setrlimit(RLIMIT_NOFILE): %m"); + + char **env = setup_environment(); + execve(args[0], args, env); + die("execve(\"%s\"): %m", args[0]); +} + +// FIXME: Prune (and also the getopt string) +static void +usage(void) +{ + fprintf(stderr, "Invalid arguments!\n"); + printf("\ +Usage: box [] -- \n\ +\n\ +Options:\n\ +-a \tSet file access level (0=none, 1=cwd, 2=/etc,/lib,..., 3=whole fs, 9=no checks; needs -f)\n\ +-c \tChange directory to first\n\ +-e\t\tInherit full environment of the parent process\n\ +-E \tInherit the environment variable from the parent process\n\ +-E =\tSet the environment variable to ; unset it if is empty\n\ +-f\t\tFilter system calls (-ff=very restricted)\n\ +-i \tRedirect stdin from \n\ +-k \tLimit stack size to KB (default: 0=unlimited)\n\ +-m \tLimit address space to KB\n\ +-M \tOutput process information to (name:value)\n\ +-o \tRedirect stdout to \n\ +-p \tPermit access to the specified path (or subtree if it ends with a `/')\n\ +-p =\tDefine action for the specified path (=yes/no)\n\ +-r \tRedirect stderr to \n\ +-s \tPermit the specified syscall (be careful)\n\ +-s =\tDefine action for the specified syscall (=yes/no/file)\n\ +-t