+/*** Control groups ***/
+
+static char cg_path[256];
+
+#define CG_BUFSIZE 1024
+
+static int
+cg_read(char *attr, char *buf)
+{
+ int maybe = 0;
+ if (attr[0] == '?')
+ {
+ attr++;
+ maybe = 1;
+ }
+
+ char path[256];
+ snprintf(path, sizeof(path), "%s/%s", cg_path, attr);
+
+ int fd = open(path, O_RDONLY);
+ if (fd < 0)
+ {
+ if (maybe)
+ return 0;
+ die("Cannot read %s: %m", path);
+ }
+
+ int n = read(fd, buf, CG_BUFSIZE);
+ if (n < 0)
+ die("Cannot read %s: %m", path);
+ if (n >= CG_BUFSIZE - 1)
+ die("Attribute %s too long", path);
+ if (n > 0 && buf[n-1] == '\n')
+ n--;
+ buf[n] = 0;
+
+ if (verbose > 1)
+ msg("CG: Read %s = %s\n", attr, buf);
+
+ close(fd);
+ return 1;
+}
+
+static void __attribute__((format(printf,2,3)))
+cg_write(char *attr, char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ char buf[CG_BUFSIZE];
+ int n = vsnprintf(buf, sizeof(buf), fmt, args);
+ if (n >= CG_BUFSIZE)
+ die("cg_writef: Value for attribute %s is too long", attr);
+
+ if (verbose > 1)
+ msg("CG: Write %s = %s", attr, buf);
+
+ char path[256];
+ snprintf(path, sizeof(path), "%s/%s", cg_path, attr);
+
+ int fd = open(path, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ die("Cannot write %s: %m", path);
+
+ int written = write(fd, buf, n);
+ if (written < 0)
+ die("Cannot set %s to %s: %m", path, buf);
+ if (written != n)
+ die("Short write to %s (%d out of %d bytes)", path, written, n);
+
+ close(fd);
+ va_end(args);
+}
+
+static void
+cg_init(void)
+{
+ if (!cg_enable)
+ return;
+
+ struct stat st;
+ if (stat(cg_root, &st) < 0 || !S_ISDIR(st.st_mode))
+ die("Control group filesystem at %s not mounted", cg_root);
+
+ snprintf(cg_path, sizeof(cg_path), "%s/box-%d", cg_root, BOX_UID);
+ msg("Using control group %s\n", cg_path);
+}
+
+static void
+cg_prepare(void)
+{
+ if (!cg_enable)
+ return;
+
+ struct stat st;
+ char buf[CG_BUFSIZE];
+
+ if (stat(cg_path, &st) >= 0 || errno != ENOENT)
+ {
+ msg("Control group %s already exists, trying to empty it.\n", cg_path);
+ if (rmdir(cg_path) < 0)
+ die("Failed to reset control group %s: %m", cg_path);
+ }
+
+ if (mkdir(cg_path, 0777) < 0)
+ die("Failed to create control group %s: %m", cg_path);
+
+ // If cpuset module is enabled, copy allowed cpus and memory nodes from parent group
+ if (cg_read("?../cpuset.cpus", buf))
+ cg_write("cpuset.cpus", "%s", buf);
+ if (cg_read("?../cpuset.mems", buf))
+ cg_write("cpuset.mems", "%s", buf);
+}
+
+static void
+cg_enter(void)
+{
+ if (!cg_enable)
+ return;
+
+ msg("Entering control group %s\n", cg_path);
+
+ struct stat st;
+ if (stat(cg_path, &st) < 0)
+ die("Control group %s does not exist: %m", cg_path);
+
+ if (cg_memory_limit)
+ {
+ cg_write("memory.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10);
+ cg_write("memory.memsw.limit_in_bytes", "%lld\n", (long long) cg_memory_limit << 10);
+ }
+
+ if (cg_timing)
+ cg_write("cpuacct.usage", "0\n");
+
+ cg_write("tasks", "%d\n", (int) getpid());
+}
+
+static int
+cg_get_run_time_ms(void)
+{
+ if (!cg_enable)
+ return 0;
+
+ char buf[CG_BUFSIZE];
+ cg_read("cpuacct.usage", buf);
+ unsigned long long ns = atoll(buf);
+ return ns / 1000000;
+}
+
+static void
+cg_stats(void)
+{
+ if (!cg_enable)
+ return;
+
+ char buf[CG_BUFSIZE];
+
+ // Memory usage statistics
+ unsigned long long mem=0, memsw=0;
+ if (cg_read("?memory.max_usage_in_bytes", buf))
+ mem = atoll(buf);
+ if (cg_read("?memory.memsw.max_usage_in_bytes", buf))
+ {
+ memsw = atoll(buf);
+ if (memsw > mem)
+ mem = memsw;
+ }
+ if (mem)
+ meta_printf("cg-mem:%lld\n", mem >> 10);
+}
+
+static void
+cg_remove(void)
+{
+ char buf[CG_BUFSIZE];
+
+ if (!cg_enable)
+ return;
+
+ cg_read("tasks", buf);
+ if (buf[0])
+ die("Some tasks left in control group %s, failed to remove it", cg_path);
+
+ // FIXME: Is this needed?
+ // cg_write("memory.force_empty", "0\n");
+
+ if (rmdir(cg_path) < 0)
+ die("Cannot remove control group %s: %m", cg_path);
+}
+