#include <stdint.h>
#include <unistd.h>
#include <getopt.h>
+#include <sched.h>
#include <time.h>
+#include <grp.h>
#include <sys/wait.h>
#include <sys/user.h>
#include <sys/time.h>
#include <sys/signal.h>
#include <sys/sysinfo.h>
#include <sys/resource.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
#define NONRET __attribute__((noreturn))
#define UNUSED __attribute__((unused))
#define ARRAY_SIZE(a) (int)(sizeof(a)/sizeof(a[0]))
+// FIXME: Make configurable, probably in compile time
+#define BOX_DIR "/tmp/box"
+#define BOX_UID 60000
+#define BOX_GID 60000
+
static int timeout; /* milliseconds */
static int wall_timeout;
static int extra_timeout;
static int memory_limit;
static int stack_limit;
static char *redir_stdin, *redir_stdout, *redir_stderr;
-static char *set_cwd;
+
+static uid_t orig_uid;
+static gid_t orig_gid;
static pid_t box_pid;
-static volatile int timer_tick;
+static volatile sig_atomic_t timer_tick;
static struct timeval start_time;
static int ticks_per_sec;
static int partial_line;
+static char cleanup_cmd[256];
-static int mem_peak_kb;
static int total_ms, wall_ms;
static void die(char *msg, ...) NONRET;
-static void sample_mem_peak(void);
/*** Meta-files ***/
meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000);
meta_printf("time-wall:%d.%03d\n", wall_ms/1000, wall_ms%1000);
- meta_printf("mem:%llu\n", (unsigned long long) mem_peak_kb * 1024);
}
/*** Messages and exits ***/
+static void
+xsystem(const char *cmd)
+{
+ int ret = system(cmd);
+ if (ret < 0)
+ die("system(\"%s\"): %m", cmd);
+ if (!WIFEXITED(ret) || WEXITSTATUS(ret))
+ die("system(\"%s\"): Exited with status %d", cmd, ret);
+}
+
static void NONRET
box_exit(int rc)
{
if (box_pid > 0)
{
- sample_mem_peak();
kill(-box_pid, SIGKILL);
kill(box_pid, SIGKILL);
meta_printf("killed:1\n");
else
final_stats(&rus);
}
+
+ if (rc < 2 && cleanup_cmd[0])
+ xsystem(cleanup_cmd);
+
meta_close();
exit(rc);
}
return env;
}
-/*** FIXME ***/
+/*** The keeper process ***/
static void
signal_alarm(int unused UNUSED)
}
static void
-sample_mem_peak(void)
-{
- /*
- * We want to find out the peak memory usage of the process, which is
- * maintained by the kernel, but unforunately it gets lost when the
- * process exits (it is not reported in struct rusage). Therefore we
- * have to sample it whenever we suspect that the process is about
- * to exit.
- */
- char buf[PROC_BUF_SIZE], *x;
- static int proc_status_fd;
- read_proc_file(buf, "status", &proc_status_fd);
-
- x = buf;
- while (*x)
- {
- char *key = x;
- while (*x && *x != ':' && *x != '\n')
- x++;
- if (!*x || *x == '\n')
- break;
- *x++ = 0;
- while (*x == ' ' || *x == '\t')
- x++;
-
- char *val = x;
- while (*x && *x != '\n')
- x++;
- if (!*x)
- break;
- *x++ = 0;
-
- if (!strcmp(key, "VmPeak"))
- {
- int peak = atoi(val);
- if (peak > mem_peak_kb)
- mem_peak_kb = peak;
- }
- }
-
- if (verbose > 1)
- msg("[mem-peak: %u KB]\n", mem_peak_kb);
-}
-
-static void
-boxkeeper(void)
+box_keeper(void)
{
struct sigaction sa;
check_timeout();
timer_tick = 0;
}
- p = wait4(box_pid, &stat, WUNTRACED, &rus);
+ p = wait4(box_pid, &stat, 0, &rus);
if (p < 0)
{
if (errno == EINTR)
if (wall_timeout && wall_ms > wall_timeout)
err("TO: Time limit exceeded (wall clock)");
flush_line();
- fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall, %d MB)\n",
+ fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall)\n",
total_ms/1000, total_ms%1000,
- wall_ms/1000, wall_ms%1000,
- (mem_peak_kb + 1023) / 1024);
+ wall_ms/1000, wall_ms%1000);
box_exit(0);
}
if (WIFSIGNALED(stat))
}
}
+/*** The process running inside the box ***/
+
static void
-box_inside(int argc, char **argv)
+setup_root(void)
{
+ umask(0027);
+
+ if (mkdir("root", 0777) < 0 && errno != EEXIST)
+ die("mkdir('root'): %m");
+
+ if (mount("none", "root", "tmpfs", 0, "mode=755") < 0)
+ die("Cannot mount root ramdisk: %m");
+
+ // FIXME: Make the list of bind-mounts configurable
+ // FIXME: Virtual /dev?
+ // FIXME: Read-only mounts?
+
+ static const char * const dirs[] = { "box", "/bin", "/lib", "/usr", "/dev" };
+ for (int i=0; i < ARRAY_SIZE(dirs); i++)
+ {
+ const char *d = dirs[i];
+ char buf[1024]; // FIXME
+ sprintf(buf, "root/%s", (d[0] == '/' ? d+1 : d));
+ msg("Binding %s on %s\n", d, buf);
+ if (mkdir(buf, 0777) < 0)
+ die("mkdir(%s): %m", buf);
+ if (mount(d, buf, "none", MS_BIND | MS_NOSUID | MS_NODEV, "") < 0)
+ die("Cannot bind %s on %s: %m", d, buf);
+ }
+
+ if (mkdir("root/proc", 0777) < 0)
+ die("Cannot create proc: %m");
+ if (mount("none", "root/proc", "proc", 0, "") < 0)
+ die("Cannot mount proc: %m");
+
+ if (chroot("root") < 0)
+ die("Chroot failed: %m");
+
+ if (chdir("root/box") < 0)
+ die("Cannot change current directory: %m");
+}
+
+static int
+box_inside(void *arg)
+{
+ char **argv = arg;
+ int argc = 0;
+ while (argv[argc])
+ argc++;
+
struct rlimit rl;
char *args[argc+1];
memcpy(args, argv, argc * sizeof(char *));
args[argc] = NULL;
- if (set_cwd && chdir(set_cwd))
- die("chdir: %m");
+
+ setup_root();
+
+ if (setresgid(BOX_GID, BOX_GID, BOX_GID) < 0)
+ die("setresgid: %m");
+ if (setgroups(0, NULL) < 0)
+ die("setgroups: %m");
+ if (setresuid(BOX_UID, BOX_UID, BOX_UID) < 0)
+ die("setresuid: %m");
+
if (redir_stdin)
{
close(0);
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
die("setrlimit(RLIMIT_NOFILE): %m");
+ // FIXME: Create multi-process mode
+ rl.rlim_cur = rl.rlim_max = 1;
+ if (setrlimit(RLIMIT_NPROC, &rl) < 0)
+ die("setrlimit(RLIMIT_NPROC): %m");
+
+ rl.rlim_cur = rl.rlim_max = 0;
+ if (setrlimit(RLIMIT_MEMLOCK, &rl) < 0)
+ die("setrlimit(RLIMIT_MEMLOCK): %m");
+
char **env = setup_environment();
execve(args[0], args, env);
die("execve(\"%s\"): %m", args[0]);
}
-// FIXME: Prune (and also the getopt string)
+static void
+prepare(void)
+{
+ msg("Preparing sandbox directory\n");
+ xsystem("rm -rf box");
+ if (mkdir("box", 0700) < 0)
+ die("Cannot create box: %m");
+ if (chown("box", orig_uid, orig_gid) < 0)
+ die("Cannot chown box: %m");
+}
+
+static void
+cleanup(void)
+{
+ msg("Deleting sandbox directory\n");
+ xsystem("rm -rf box");
+}
+
+static void
+run(char **argv)
+{
+ struct stat st;
+ if (stat("box", &st) < 0 || !S_ISDIR(st.st_mode))
+ die("Box directory not found, did you run `isolate --prepare'?");
+
+ char cmd[256];
+ snprintf(cmd, sizeof(cmd), "chown -R %d.%d box", BOX_UID, BOX_GID);
+ xsystem(cmd);
+ snprintf(cleanup_cmd, sizeof(cleanup_cmd), "chown -R %d.%d box", orig_uid, orig_gid);
+
+ box_pid = clone(
+ box_inside, // Function to execute as the body of the new process
+ argv, // Pass our stack
+ SIGCHLD | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWNS | CLONE_NEWPID,
+ argv); // Pass the arguments
+ if (box_pid < 0)
+ die("clone: %m");
+ if (!box_pid)
+ die("clone returned 0");
+ box_keeper();
+}
+
+static void
+show_version(void)
+{
+ // FIXME
+ printf("Process isolator 0.0\n");
+ printf("(c) 2012 Martin Mares <mj@ucw.cz>\n\n");
+ printf("Sandbox directory: %s\n", BOX_DIR);
+ printf("Sandbox credentials: uid=%u gid=%u\n", BOX_UID, BOX_GID);
+}
+
static void
usage(void)
{
fprintf(stderr, "Invalid arguments!\n");
printf("\
-Usage: box [<options>] -- <command> <arguments>\n\
+Usage: isolate [<options>] <command>\n\
\n\
Options:\n\
--a <level>\tSet file access level (0=none, 1=cwd, 2=/etc,/lib,..., 3=whole fs, 9=no checks; needs -f)\n\
--c <dir>\tChange directory to <dir> first\n\
--e\t\tInherit full environment of the parent process\n\
--E <var>\tInherit the environment variable <var> from the parent process\n\
--E <var>=<val>\tSet the environment variable <var> to <val>; unset it if <var> is empty\n\
--f\t\tFilter system calls (-ff=very restricted)\n\
--i <file>\tRedirect stdin from <file>\n\
--k <size>\tLimit stack size to <size> KB (default: 0=unlimited)\n\
--m <size>\tLimit address space to <size> KB\n\
--M <file>\tOutput process information to <file> (name:value)\n\
--o <file>\tRedirect stdout to <file>\n\
--p <path>\tPermit access to the specified path (or subtree if it ends with a `/')\n\
--p <path>=<act>\tDefine action for the specified path (<act>=yes/no)\n\
--r <file>\tRedirect stderr to <file>\n\
--s <sys>\tPermit the specified syscall (be careful)\n\
--s <sys>=<act>\tDefine action for the specified syscall (<act>=yes/no/file)\n\
--t <time>\tSet run time limit (seconds, fractions allowed)\n\
--T\t\tAllow syscalls for measuring run time\n\
--v\t\tBe verbose (use multiple times for even more verbosity)\n\
--w <time>\tSet wall clock time limit (seconds, fractions allowed)\n\
--x <time>\tSet extra timeout, before which a timing-out program is not yet killed,\n\
-\t\tso that its real execution time is reported (seconds, fractions allowed)\n\
+-e, --full-env\t\tInherit full environment of the parent process\n\
+-E, --env=<var>\tInherit the environment variable <var> from the parent process\n\
+-E, --env=<var>=<val>\tSet the environment variable <var> to <val>; unset it if <var> is empty\n\
+-i, --stdin=<file>\tRedirect stdin from <file>\n\
+-k, --stack=<size>\tLimit stack size to <size> KB (default: 0=unlimited)\n\
+-m, --mem=<size>\tLimit address space to <size> KB\n\
+-M, --meta=<file>\tOutput process information to <file> (name:value)\n\
+-o, --stdout=<file>\tRedirect stdout to <file>\n\
+-r, --stderr=<file>\tRedirect stderr to <file>\n\
+-t, --time=<time>\tSet run time limit (seconds, fractions allowed)\n\
+-v, --verbose\t\tBe verbose (use multiple times for even more verbosity)\n\
+-w, --wall-time=<time>\tSet wall clock time limit (seconds, fractions allowed)\n\
+-x, --extra-time=<time>\tSet extra timeout, before which a timing-out program is not yet killed,\n\
+\t\t\tso that its real execution time is reported (seconds, fractions allowed)\n\
+\n\
+Commands:\n\
+ --prepare\t\tInitialize sandbox\n\
+ --run -- <cmd> ...\tRun given command within sandbox\n\
+ --cleanup\t\tClean up sandbox\n\
+ --version\t\tDisplay program version and configuration\n\
");
exit(2);
}
+enum opt_code {
+ OPT_PREPARE = 256,
+ OPT_RUN,
+ OPT_CLEANUP,
+ OPT_VERSION,
+};
+
+static const char short_opts[] = "eE:i:k:m:M:o:r:t:vw:x:";
+
+static const struct option long_opts[] = {
+ { "full-env", 0, NULL, 'e' },
+ { "env", 1, NULL, 'E' },
+ { "stdin", 1, NULL, 'i' },
+ { "stack", 1, NULL, 'k' },
+ { "mem", 1, NULL, 'm' },
+ { "meta", 1, NULL, 'M' },
+ { "stdout", 1, NULL, 'o' },
+ { "stderr", 1, NULL, 'r' },
+ { "time", 1, NULL, 't' },
+ { "verbose", 0, NULL, 'v' },
+ { "wall-time", 1, NULL, 'w' },
+ { "extra-time", 1, NULL, 'x' },
+ { "prepare", 0, NULL, OPT_PREPARE },
+ { "run", 0, NULL, OPT_RUN },
+ { "cleanup", 0, NULL, OPT_CLEANUP },
+ { "version", 0, NULL, OPT_VERSION },
+ { NULL, 0, NULL, 0 }
+};
+
int
main(int argc, char **argv)
{
int c;
- uid_t uid;
+ enum opt_code mode = 0;
- while ((c = getopt(argc, argv, "a:c:eE:fi:k:m:M:o:p:r:s:t:Tvw:x:")) >= 0)
+ while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0)
switch (c)
{
- case 'c':
- set_cwd = optarg;
- break;
case 'e':
pass_environ = 1;
break;
verbose++;
break;
case 'w':
- wall_timeout = 1000*atof(optarg);
+ wall_timeout = 1000*atof(optarg);
break;
case 'x':
extra_timeout = 1000*atof(optarg);
break;
+ case OPT_PREPARE:
+ case OPT_RUN:
+ case OPT_CLEANUP:
+ case OPT_VERSION:
+ mode = c;
+ break;
default:
usage();
}
- if (optind >= argc)
+
+ if (!mode)
usage();
+ if (mode == OPT_VERSION)
+ {
+ show_version();
+ return 0;
+ }
- uid = geteuid();
- if (setreuid(uid, uid) < 0)
- die("setreuid: %m");
- box_pid = fork();
- if (box_pid < 0)
- die("fork: %m");
- if (!box_pid)
- box_inside(argc-optind, argv+optind);
- else
- boxkeeper();
- die("Internal error: fell over edge of the world");
+ if (geteuid())
+ die("Must be started as root");
+ orig_uid = getuid();
+ orig_gid = getgid();
+
+ if (chdir(BOX_DIR) < 0)
+ die("chdir(%s): %m", BOX_DIR);
+
+ switch (mode)
+ {
+ case OPT_PREPARE:
+ if (optind < argc)
+ usage();
+ prepare();
+ break;
+ case OPT_RUN:
+ if (optind >= argc)
+ usage();
+ run(argv+optind);
+ break;
+ case OPT_CLEANUP:
+ if (optind < argc)
+ usage();
+ cleanup();
+ break;
+ default:
+ die("Internal error: mode mismatch");
+ }
+ exit(0);
}