+/*
+ * Duplicate Mail Checker
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <time.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+
+static char *db_name;
+static uns max_age = 86400;
+static uns now;
+static char *local_user;
+static char *local_domain;
+
+struct item {
+ unsigned char digest[20];
+ uns timestamp;
+};
+
+static int db_fd;
+static uns db_items;
+static struct item *db_map;
+static uns db_map_size;
+
+static void
+db_open(void)
+{
+ if (!db_name)
+ {
+ struct passwd *pw = getpwuid(getuid());
+ if (!pw)
+ die("Sorry, but you don't exist!");
+ db_name = xmalloc(strlen(pw->pw_dir) + 10);
+ sprintf(db_name, "%s/.mdup.db", pw->pw_dir);
+ }
+ verb(2, "Opening database %s", db_name);
+ db_fd = open(db_name, O_RDWR | O_CREAT, 0600);
+ if (db_fd < 0)
+ die("Cannot open database %s: %m", db_name);
+ if (flock(db_fd, LOCK_EX) < 0)
+ die("Cannot flock %s: %m", db_name);
+
+ struct stat st;
+ if (fstat(db_fd, &st) < 0)
+ die("Cannot stat %s: %m", db_name);
+ if (st.st_size % sizeof(struct item))
+ die("Database %s is inconsistent: Size is not an integer number of records");
+ db_items = st.st_size / sizeof(struct item);
+ verb(2, "Mapping %d items", db_items);
+
+ db_map_size = sizeof(struct item) * (db_items+16);
+ db_map = mmap(NULL, db_map_size, PROT_READ | PROT_WRITE, MAP_SHARED, db_fd, 0);
+ if (db_map == MAP_FAILED)
+ die("Mmap on %s failed: %m", db_name);
+}
+
+static void
+db_close(void)
+{
+ munmap(db_map, db_map_size);
+ flock(db_fd, LOCK_UN);
+ close(db_fd);
+}
+
+static int
+db_lookup(struct item *new)
+{
+ struct item *free = NULL;
+
+ for (uns i=0; i<db_items; i++)
+ {
+ struct item *t = &db_map[i];
+ if (t->timestamp <= now && now - t->timestamp > max_age)
+ {
+ if (!free)
+ free = t;
+ }
+ else if (!memcmp(t->digest, new->digest, 20))
+ {
+ verb(2, "Found at item %d, age %d sec", i, now - t->timestamp);
+ t->timestamp = now;
+ return 1;
+ }
+ }
+
+ if (!free)
+ {
+ if (sizeof(struct item) * db_items >= db_map_size)
+ die("Internal error: map window too small");
+ free = &db_map[db_items++];
+ if (ftruncate(db_fd, sizeof(struct item) * db_items) < 0)
+ die("Cannot enlarge %s: %m", db_name);
+ }
+ verb(2, "Creating new entry");
+ *free = *new;
+ free->timestamp = now;
+ return 0;
+}
+
+#define MAX_HDR_LEN 1024
+
+static char *
+skip_cfws(char *c)
+{
+ int nest = 0;
+
+ for (;;)
+ {
+ if (!*c)
+ return c;
+ else if (*c == ' ')
+ ;
+ else if (*c == '(')
+ nest++;
+ else if (!nest)
+ return c;
+ else if (*c == ')')
+ nest--;
+ else if (*c == '\\' && c[1])
+ c++;
+ c++;
+ }
+}
+
+static void
+parse_header_line(char *line, uns cnt, struct item *item)
+{
+ if (!cnt)
+ return;
+ if (cnt >= MAX_HDR_LEN)
+ {
+ verb(2, "HDR: <too long>");
+ return;
+ }
+
+ line[cnt] = 0;
+ verb(2, "HDR: %s", line);
+ if (strncasecmp(line, "Message-ID: ", 12))
+ return;
+ char *c = skip_cfws(line+12);
+ if (*c++ != '<')
+ return;
+
+ char lhs[MAX_HDR_LEN], *l = lhs;
+ if (*c == '\"') // LHS is no-fold-quote
+ {
+ c++;
+ while (*c != '\"')
+ {
+ if (!*c)
+ return;
+ else if (*c == '\\' && c[1])
+ {
+ *l++ = c[1];
+ c += 2;
+ }
+ else
+ *l++ = *c++;
+ }
+ c++;
+ }
+ else // LHS is dot-atom-text
+ {
+ while (*c && *c != '@')
+ *l++ = *c++;
+ }
+ *l++ = 0;
+
+ if (*c++ != '@') // "@" is mandatory
+ return;
+
+ char rhs[MAX_HDR_LEN], *r = rhs;
+ if (*c == '[') // RHS is no-fold-literal
+ {
+ while (*c != ']')
+ {
+ if (!*c)
+ return;
+ else if (*c == '\\' && c[1])
+ {
+ *r++ = c[1];
+ c += 2;
+ }
+ else
+ *r++ = *c++;
+ }
+ c++;
+ }
+ else // RHS is dot-atom-text
+ {
+ while (*c && *c != '>')
+ *r++ = *c++;
+ }
+ *r++ = 0;
+
+ if (*c != '>')
+ return;
+
+ *c = 0;
+ verb(1, "Parsed Message-ID <%s@%s>", lhs, rhs);
+
+ if (local_domain && local_user)
+ {
+ uns lul = strlen(local_user);
+ if (!strcasecmp(rhs, local_domain) &&
+ !strncasecmp(lhs, local_user, lul) &&
+ !strncasecmp(lhs+lul, "+md+", 4))
+ {
+ verb(1, "Detected local Message-ID");
+ item->timestamp = 2;
+ return;
+ }
+ }
+
+ struct sha1_ctx ctx;
+ sha1_init(&ctx);
+ sha1_update(&ctx, lhs, l-lhs);
+ sha1_update(&ctx, rhs, r-rhs);
+ sha1_final(&ctx, item->digest);
+ item->timestamp = 1;
+ if (verbose >= 1)
+ {
+ fprintf(stderr, "Digest: ");
+ for (uns i=0; i<20; i++)
+ fprintf(stderr, "%02x", item->digest[i]);
+ fprintf(stderr, "\n");
+ }
+}
+
+static int
+parse_header(struct item *item)
+{
+ char buf[MAX_HDR_LEN+1];
+ uns cnt = 0;
+ uns last_nl = 0;
+
+ item->timestamp = 0;
+ for (;;)
+ {
+ int c = getchar();
+ if (c < 0)
+ {
+ verb(1, "Incomplete header");
+ return -1;
+ }
+ if (c == '\r')
+ ;
+ else if (c == '\n')
+ {
+ if (cnt == last_nl) // End of header
+ {
+ parse_header_line(buf, cnt, item);
+ return item->timestamp;
+ }
+ last_nl = cnt;
+ }
+ else if (c == ' ' || c == '\t' || !c)
+ {
+ if (!cnt)
+ {
+ verb(1, "Misplaced whitespace at the beginning of header");
+ return -1;
+ }
+ if (cnt < MAX_HDR_LEN)
+ buf[cnt++] = ' ';
+ }
+ else
+ {
+ if (cnt == last_nl)
+ {
+ parse_header_line(buf, cnt, item);
+ cnt = last_nl = 0;
+ }
+ if (cnt < MAX_HDR_LEN)
+ buf[cnt++] = c;
+ }
+ }
+}
+
+static void NONRET
+usage(void)
+{
+ fprintf(stderr, "Usage: mdup [<options>]\n\
+\n\
+Options:\n\
+-a <age>\t\tRecords older than <age> hours are ignored (default: 24)\n\
+-d <db>\t\t\tUse <db> as a Message-ID database (default: ~/.mdup.db; beware of NFS)\n\
+-l <user>@<domain>\tDetect looped back messages by their Message-ID\n\
+-v\t\t\tIncrease verbosity\n\
+\n\
+MailDups " STR(VERSION) ", (c) " STR(YEAR) " Martin Mares <mj@ucw.cz>\n\
+It can be freely distributed and used according to the GNU GPL v2.\n\
+");
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ while ((c = getopt(argc, argv, "a:d:l:v")) >= 0)
+ switch (c)
+ {
+ case 'a':
+ max_age = atol(optarg) * 3600;
+ break;
+ case 'd':
+ db_name = optarg;
+ break;
+ case 'l':
+ {
+ char *c = strchr(optarg, '@');
+ if (!c)
+ usage();
+ *c++ = 0;
+ local_user = optarg;
+ local_domain = c;
+ break;
+ }
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ }
+ if (optind < argc)
+ usage();
+
+ now = time(NULL);
+
+ struct item msg;
+ int ok = parse_header(&msg);
+ switch (ok)
+ {
+ case 0:
+ puts("NO ID");
+ break;
+ case 1:
+ db_open();
+ int ret = db_lookup(&msg);
+ db_close();
+ puts(ret ? "DUP" : "OK");
+ break;
+ case 2:
+ puts("LOCAL");
+ break;
+ default:
+ puts("ERROR");
+ }
+
+ return 0;
+}