#include <sys/sysinfo.h>
#include <sys/syscall.h>
#include <sys/resource.h>
+#include <linux/ptrace.h>
#define NONRET __attribute__((noreturn))
#define UNUSED __attribute__((unused))
static int filter_syscalls; /* 0=off, 1=liberal, 2=totalitarian */
static int timeout; /* milliseconds */
static int wall_timeout;
+static int extra_timeout;
static int pass_environ;
static int file_access;
static int verbose;
static int memory_limit;
+static int stack_limit;
static char *redir_stdin, *redir_stdout, *redir_stderr;
static char *set_cwd;
static int exec_seen;
static int partial_line;
+static int mem_peak_kb;
+static int total_ms, wall_ms;
+
static void die(char *msg, ...) NONRET;
+static void sample_mem_peak(void);
/*** Meta-files ***/
va_end(args);
}
-static int total_ms, wall_ms;
-
static void
final_stats(struct rusage *rus)
{
meta_printf("time:%d.%03d\n", total_ms/1000, total_ms%1000);
meta_printf("time-wall:%d.%03d\n", wall_ms/1000, wall_ms%1000);
+ meta_printf("mem:%llu\n", (unsigned long long) mem_peak_kb * 1024);
}
/*** Messages and exits ***/
{
if (box_pid > 0)
{
+ sample_mem_peak();
if (is_ptraced)
ptrace(PTRACE_KILL, box_pid);
kill(-box_pid, SIGKILL);
kill(box_pid, SIGKILL);
+ meta_printf("killed:1\n");
struct rusage rus;
- int stat;
- int p = wait4(box_pid, &stat, 0, &rus);
+ int p, stat;
+ do
+ p = wait4(box_pid, &stat, 0, &rus);
+ while (p < 0 && errno == EINTR);
if (p < 0)
- fprintf(stderr, "UGH: Lost track of the process\n");
+ fprintf(stderr, "UGH: Lost track of the process (%m)\n");
else
final_stats(&rus);
}
A_NO, // Always forbid
A_YES, // Always permit
A_FILENAME, // Permit if arg1 is a known filename
+ A_ACTION_MASK = 15,
+ A_NO_RETVAL = 32, // Does not return a value
+ A_SAMPLE_MEM = 64, // Sample memory usage before the syscall
A_LIBERAL = 128, // Valid only in liberal mode
+ // Must fit in a unsigned char
};
static unsigned char syscall_action[NUM_ACTIONS] = {
S(readlink) = A_FILENAME,
// Syscalls permitted always
- S(exit) = A_YES,
+ S(exit) = A_YES | A_SAMPLE_MEM,
S(read) = A_YES,
S(write) = A_YES,
S(close) = A_YES,
S(fcntl) = A_YES,
S(fcntl64) = A_YES,
S(mmap) = A_YES,
- S(mmap) = A_YES,
+ S(mmap2) = A_YES,
S(munmap) = A_YES,
S(ioctl) = A_YES,
S(uname) = A_YES,
S(set_thread_area) = A_YES,
S(get_thread_area) = A_YES,
S(set_tid_address) = A_YES,
- S(exit_group) = A_YES,
+ S(exit_group) = A_YES | A_SAMPLE_MEM,
// Syscalls permitted only in liberal mode
S(time) = A_YES | A_LIBERAL,
S(readdir) = A_YES | A_LIBERAL,
S(setitimer) = A_YES | A_LIBERAL,
S(getitimer) = A_YES | A_LIBERAL,
- S(sigreturn) = A_YES | A_LIBERAL,
+ S(sigreturn) = A_YES | A_LIBERAL | A_NO_RETVAL,
S(mprotect) = A_YES | A_LIBERAL,
S(sigprocmask) = A_YES | A_LIBERAL,
S(getdents) = A_YES | A_LIBERAL,
S(poll) = A_YES | A_LIBERAL,
S(getcwd) = A_YES | A_LIBERAL,
S(nanosleep) = A_YES | A_LIBERAL,
- S(rt_sigreturn) = A_YES | A_LIBERAL,
+ S(rt_sigreturn) = A_YES | A_LIBERAL | A_NO_RETVAL,
S(rt_sigaction) = A_YES | A_LIBERAL,
S(rt_sigprocmask) = A_YES | A_LIBERAL,
S(rt_sigpending) = A_YES | A_LIBERAL,
err("FA: Forbidden access to file `%s'", namebuf);
}
+// Check syscall. If invalid, return -1, otherwise return the action mask.
static int
valid_syscall(struct user *u)
{
unsigned int sys = u->regs.orig_eax;
- enum action act = (sys < NUM_ACTIONS) ? syscall_action[sys] : A_DEFAULT;
+ unsigned int act = (sys < NUM_ACTIONS) ? syscall_action[sys] : A_DEFAULT;
if (act & A_LIBERAL)
{
- if (filter_syscalls == 1)
- act &= ~A_LIBERAL;
- else
+ if (filter_syscalls != 1)
act = A_DEFAULT;
}
- switch (act)
+
+ switch (act & A_ACTION_MASK)
{
case A_YES:
- return 1;
+ return act;
case A_NO:
- return 0;
+ return -1;
case A_FILENAME:
valid_filename(u->regs.ebx);
- return 1;
+ return act;
default: ;
}
meta_printf("exitsig:%d\n", (int)u->regs.ecx);
err("SG: Committed suicide by signal %d", (int)u->regs.ecx);
}
- return 0;
+ return -1;
case __NR_tgkill:
if (u->regs.ebx == box_pid && u->regs.ecx == box_pid)
{
meta_printf("exitsig:%d\n", (int)u->regs.edx);
err("SG: Committed suicide by signal %d", (int)u->regs.edx);
}
- return 0;
+ return -1;
default:
- return 0;
+ return -1;
}
}
err("SG: Interrupted");
}
+#define PROC_BUF_SIZE 4096
+static void
+read_proc_file(char *buf, char *name, int *fdp)
+{
+ int c;
+
+ if (!*fdp)
+ {
+ sprintf(buf, "/proc/%d/%s", (int) box_pid, name);
+ *fdp = open(buf, O_RDONLY);
+ if (*fdp < 0)
+ die("open(%s): %m", buf);
+ }
+ lseek(*fdp, 0, SEEK_SET);
+ if ((c = read(*fdp, buf, PROC_BUF_SIZE-1)) < 0)
+ die("read on /proc/$pid/%s: %m", name);
+ if (c >= PROC_BUF_SIZE-1)
+ die("/proc/$pid/%s too long", name);
+ buf[c] = 0;
+}
+
static void
check_timeout(void)
{
}
if (timeout)
{
- char buf[4096], *x;
- int c, utime, stime, ms;
- static int proc_status_fd;
- if (!proc_status_fd)
- {
- sprintf(buf, "/proc/%d/stat", (int) box_pid);
- proc_status_fd = open(buf, O_RDONLY);
- if (proc_status_fd < 0)
- die("open(%s): %m", buf);
- }
- lseek(proc_status_fd, 0, SEEK_SET);
- if ((c = read(proc_status_fd, buf, sizeof(buf)-1)) < 0)
- die("read on /proc/$pid/stat: %m");
- if (c >= (int) sizeof(buf) - 1)
- die("/proc/$pid/stat too long");
- buf[c] = 0;
+ char buf[PROC_BUF_SIZE], *x;
+ int utime, stime, ms;
+ static int proc_stat_fd;
+ read_proc_file(buf, "stat", &proc_stat_fd);
x = buf;
while (*x && *x != ' ')
x++;
while (*x == ' ')
x++;
if (*x++ != '(')
- die("proc syntax error 1");
+ die("proc stat syntax error 1");
while (*x && (*x != ')' || x[1] != ' '))
x++;
while (*x == ')' || *x == ' ')
x++;
if (sscanf(x, "%*c %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %d %d", &utime, &stime) != 2)
- die("proc syntax error 2");
+ die("proc stat syntax error 2");
ms = (utime + stime) * 1000 / ticks_per_sec;
if (verbose > 1)
fprintf(stderr, "[time check: %d msec]\n", ms);
- if (ms > timeout)
+ if (ms > timeout && ms > extra_timeout)
err("TO: Time limit exceeded");
}
}
+static void
+sample_mem_peak(void)
+{
+ /*
+ * We want to find out the peak memory usage of the process, which is
+ * maintained by the kernel, but unforunately it gets lost when the
+ * process exits (it is not reported in struct rusage). Therefore we
+ * have to sample it whenever we suspect that the process is about
+ * to exit.
+ */
+ char buf[PROC_BUF_SIZE], *x;
+ static int proc_status_fd;
+ read_proc_file(buf, "status", &proc_status_fd);
+
+ x = buf;
+ while (*x)
+ {
+ char *key = x;
+ while (*x && *x != ':' && *x != '\n')
+ x++;
+ if (!*x || *x == '\n')
+ break;
+ *x++ = 0;
+ while (*x == ' ' || *x == '\t')
+ x++;
+
+ char *val = x;
+ while (*x && *x != '\n')
+ x++;
+ if (!*x)
+ break;
+ *x++ = 0;
+
+ if (!strcmp(key, "VmPeak"))
+ {
+ int peak = atoi(val);
+ if (peak > mem_peak_kb)
+ mem_peak_kb = peak;
+ }
+ }
+
+ if (verbose > 1)
+ msg("[mem-peak: %u KB]\n", mem_peak_kb);
+}
+
static void
boxkeeper(void)
{
- int syscall_count = 0;
+ int syscall_count = (filter_syscalls ? 0 : 1);
struct sigaction sa;
is_ptraced = 1;
+
bzero(&sa, sizeof(sa));
sa.sa_handler = signal_int;
sigaction(SIGINT, &sa, NULL);
+
gettimeofday(&start_time, NULL);
ticks_per_sec = sysconf(_SC_CLK_TCK);
if (ticks_per_sec <= 0)
die("Invalid ticks_per_sec!");
+
if (timeout || wall_timeout)
{
sa.sa_handler = signal_alarm;
sigaction(SIGALRM, &sa, NULL);
alarm(1);
}
+
for(;;)
{
struct rusage rus;
if (wall_timeout && wall_ms > wall_timeout)
err("TO: Time limit exceeded (wall clock)");
flush_line();
- fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall, %d syscalls)\n",
+ fprintf(stderr, "OK (%d.%03d sec real, %d.%03d sec wall, %d MB, %d syscalls)\n",
total_ms/1000, total_ms%1000,
wall_ms/1000, wall_ms%1000,
+ (mem_peak_kb + 1023) / 1024,
syscall_count);
box_exit(0);
}
{
box_pid = 0;
meta_printf("exitsig:%d\n", WTERMSIG(stat));
+ final_stats(&rus);
err("SG: Caught fatal signal %d%s", WTERMSIG(stat), (syscall_count ? "" : " during startup"));
}
if (WIFSTOPPED(stat))
int sig = WSTOPSIG(stat);
if (sig == SIGTRAP)
{
+ if (verbose > 2)
+ msg("[ptrace status %08x] ", stat);
+ static int stop_count;
+ if (!stop_count++) /* Traceme request */
+ msg(">> Traceme request caught\n");
+ else
+ err("SG: Breakpoint");
+ ptrace(PTRACE_SYSCALL, box_pid, 0, 0);
+ }
+ else if (sig == (SIGTRAP | 0x80))
+ {
+ if (verbose > 2)
+ msg("[ptrace status %08x] ", stat);
struct user u;
- static int stop_count = -1;
+ static unsigned int sys_tick, last_sys, last_act;
if (ptrace(PTRACE_GETREGS, box_pid, NULL, &u) < 0)
die("ptrace(PTRACE_GETREGS): %m");
- stop_count++;
- if (!stop_count) /* Traceme request */
- msg(">> Traceme request caught\n");
- else if (stop_count & 1) /* Syscall entry */
+ unsigned int sys = u.regs.orig_eax;
+ if (++sys_tick & 1) /* Syscall entry */
{
char namebuf[32];
- msg(">> Syscall %-12s (%08lx,%08lx,%08lx) ", syscall_name(u.regs.orig_eax, namebuf), u.regs.ebx, u.regs.ecx, u.regs.edx);
+ int act;
+ msg(">> Syscall %-12s (%08lx,%08lx,%08lx) ", syscall_name(sys, namebuf), u.regs.ebx, u.regs.ecx, u.regs.edx);
if (!exec_seen)
{
msg("[master] ");
- if (u.regs.orig_eax == __NR_execve)
+ if (sys == __NR_execve)
exec_seen = 1;
}
- else if (valid_syscall(&u))
- syscall_count++;
+ else if ((act = valid_syscall(&u)) >= 0)
+ {
+ last_act = act;
+ syscall_count++;
+ if (act & A_SAMPLE_MEM)
+ sample_mem_peak();
+ }
else
{
/*
* so we have to change it to something harmless (e.g., an undefined
* syscall) and make the program continue.
*/
- unsigned int sys = u.regs.orig_eax;
u.regs.orig_eax = 0xffffffff;
if (ptrace(PTRACE_SETREGS, box_pid, NULL, &u) < 0)
die("ptrace(PTRACE_SETREGS): %m");
err("FO: Forbidden syscall %s", syscall_name(sys, namebuf));
}
+ last_sys = sys;
}
else /* Syscall return */
- msg("= %ld\n", u.regs.eax);
+ {
+ if (sys == 0xffffffff)
+ {
+ /* Some syscalls (sigreturn et al.) do not return a value */
+ if (!(last_act & A_NO_RETVAL))
+ err("XX: Syscall does not return, but it should");
+ }
+ else
+ {
+ if (sys != last_sys)
+ err("XX: Mismatched syscall entry/exit");
+ }
+ if (last_act & A_NO_RETVAL)
+ msg("= ?\n");
+ else
+ msg("= %ld\n", u.regs.eax);
+ }
+ ptrace(PTRACE_SYSCALL, box_pid, 0, 0);
+ }
+ else if (sig == SIGSTOP)
+ {
+ msg(">> SIGSTOP\n");
+ if (ptrace(PTRACE_SETOPTIONS, box_pid, NULL, (void *) PTRACE_O_TRACESYSGOOD) < 0)
+ die("ptrace(PTRACE_SETOPTIONS): %m");
ptrace(PTRACE_SYSCALL, box_pid, 0, 0);
}
- else if (sig != SIGSTOP && sig != SIGXCPU && sig != SIGXFSZ)
+ else if (sig != SIGXCPU && sig != SIGXFSZ)
{
msg(">> Signal %d\n", sig);
+ sample_mem_peak(); /* Signal might be fatal, so update mem-peak */
ptrace(PTRACE_SYSCALL, box_pid, 0, sig);
}
else
else
dup2(1, 2);
setpgrp();
+
if (memory_limit)
{
rl.rlim_cur = rl.rlim_max = memory_limit * 1024;
if (setrlimit(RLIMIT_AS, &rl) < 0)
- die("setrlimit: %m");
+ die("setrlimit(RLIMIT_AS): %m");
}
+
+ rl.rlim_cur = rl.rlim_max = (stack_limit ? (rlim_t)stack_limit * 1024 : RLIM_INFINITY);
+ if (setrlimit(RLIMIT_STACK, &rl) < 0)
+ die("setrlimit(RLIMIT_STACK): %m");
+
rl.rlim_cur = rl.rlim_max = 64;
if (setrlimit(RLIMIT_NOFILE, &rl) < 0)
- die("setrlimit: %m");
+ die("setrlimit(RLIMIT_NOFILE): %m");
+
+ char **env = setup_environment();
if (filter_syscalls)
{
if (ptrace(PTRACE_TRACEME) < 0)
die("ptrace(PTRACE_TRACEME): %m");
/* Trick: Make sure that we are stopped until the boxkeeper wakes up. */
- signal(SIGCHLD, SIG_IGN);
- raise(SIGCHLD);
+ raise(SIGSTOP);
}
- execve(args[0], args, setup_environment());
+ execve(args[0], args, env);
die("execve(\"%s\"): %m", args[0]);
}
-E <var>=<val>\tSet the environment variable <var> to <val>; unset it if <var> is empty\n\
-f\t\tFilter system calls (-ff=very restricted)\n\
-i <file>\tRedirect stdin from <file>\n\
+-k <size>\tLimit stack size to <size> KB (default: 0=unlimited)\n\
-m <size>\tLimit address space to <size> KB\n\
-M <file>\tOutput process information to <file> (name:value)\n\
-o <file>\tRedirect stdout to <file>\n\
-T\t\tAllow syscalls for measuring run time\n\
-v\t\tBe verbose (use multiple times for even more verbosity)\n\
-w <time>\tSet wall clock time limit (seconds, fractions allowed)\n\
+-x <time>\tSet extra timeout, before which a timing-out program is not yet killed,\n\
+\t\tso that its real execution time is reported (seconds, fractions allowed)\n\
");
exit(2);
}
int c;
uid_t uid;
- while ((c = getopt(argc, argv, "a:c:eE:fi:m:M:o:p:r:s:t:Tvw:")) >= 0)
+ while ((c = getopt(argc, argv, "a:c:eE:fi:k:m:M:o:p:r:s:t:Tvw:x:")) >= 0)
switch (c)
{
case 'a':
case 'f':
filter_syscalls++;
break;
+ case 'k':
+ stack_limit = atol(optarg);
+ break;
case 'i':
redir_stdin = optarg;
break;
case 'w':
wall_timeout = 1000*atof(optarg);
break;
+ case 'x':
+ extra_timeout = 1000*atof(optarg);
+ break;
default:
usage();
}