--- /dev/null
+#DEBUG=-ggdb
+CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline $(DEBUG) -std=gnu99 -DVERSION=$(VERSION) -DYEAR=$(YEAR)
+
+VERSION=0.1
+YEAR=2006
+
+all: mdup
+
+mdup: mdup.o util.o sha1.o
+
+mdup.o: mdup.c util.h
+util.o: util.c util.h
+sha1.o: sha1.c util.h
+
+clean:
+ rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core`
+ rm -f mdup
+ rm -rf maint/dist
+
+distclean: clean
+
+.PHONY: all clean distclean
--- /dev/null
+/*
+ * Duplicate Mail Checker
+ *
+ * (c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <time.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+
+static char *db_name;
+static uns max_age = 86400;
+static uns now;
+static char *local_user;
+static char *local_domain;
+
+struct item {
+ unsigned char digest[20];
+ uns timestamp;
+};
+
+static int db_fd;
+static uns db_items;
+static struct item *db_map;
+static uns db_map_size;
+
+static void
+db_open(void)
+{
+ if (!db_name)
+ {
+ struct passwd *pw = getpwuid(getuid());
+ if (!pw)
+ die("Sorry, but you don't exist!");
+ db_name = xmalloc(strlen(pw->pw_dir) + 10);
+ sprintf(db_name, "%s/.mdup.db", pw->pw_dir);
+ }
+ verb(2, "Opening database %s", db_name);
+ db_fd = open(db_name, O_RDWR | O_CREAT, 0600);
+ if (db_fd < 0)
+ die("Cannot open database %s: %m", db_name);
+ if (flock(db_fd, LOCK_EX) < 0)
+ die("Cannot flock %s: %m", db_name);
+
+ struct stat st;
+ if (fstat(db_fd, &st) < 0)
+ die("Cannot stat %s: %m", db_name);
+ if (st.st_size % sizeof(struct item))
+ die("Database %s is inconsistent: Size is not an integer number of records");
+ db_items = st.st_size / sizeof(struct item);
+ verb(2, "Mapping %d items", db_items);
+
+ db_map_size = sizeof(struct item) * (db_items+16);
+ db_map = mmap(NULL, db_map_size, PROT_READ | PROT_WRITE, MAP_SHARED, db_fd, 0);
+ if (db_map == MAP_FAILED)
+ die("Mmap on %s failed: %m", db_name);
+}
+
+static void
+db_close(void)
+{
+ munmap(db_map, db_map_size);
+ flock(db_fd, LOCK_UN);
+ close(db_fd);
+}
+
+static int
+db_lookup(struct item *new)
+{
+ struct item *free = NULL;
+
+ for (uns i=0; i<db_items; i++)
+ {
+ struct item *t = &db_map[i];
+ if (t->timestamp <= now && now - t->timestamp > max_age)
+ {
+ if (!free)
+ free = t;
+ }
+ else if (!memcmp(t->digest, new->digest, 20))
+ {
+ verb(2, "Found at item %d, age %d sec", i, now - t->timestamp);
+ t->timestamp = now;
+ return 1;
+ }
+ }
+
+ if (!free)
+ {
+ if (sizeof(struct item) * db_items >= db_map_size)
+ die("Internal error: map window too small");
+ free = &db_map[db_items++];
+ if (ftruncate(db_fd, sizeof(struct item) * db_items) < 0)
+ die("Cannot enlarge %s: %m", db_name);
+ }
+ verb(2, "Creating new entry");
+ *free = *new;
+ free->timestamp = now;
+ return 0;
+}
+
+#define MAX_HDR_LEN 1024
+
+static char *
+skip_cfws(char *c)
+{
+ int nest = 0;
+
+ for (;;)
+ {
+ if (!*c)
+ return c;
+ else if (*c == ' ')
+ ;
+ else if (*c == '(')
+ nest++;
+ else if (!nest)
+ return c;
+ else if (*c == ')')
+ nest--;
+ else if (*c == '\\' && c[1])
+ c++;
+ c++;
+ }
+}
+
+static void
+parse_header_line(char *line, uns cnt, struct item *item)
+{
+ if (!cnt)
+ return;
+ if (cnt >= MAX_HDR_LEN)
+ {
+ verb(2, "HDR: <too long>");
+ return;
+ }
+
+ line[cnt] = 0;
+ verb(2, "HDR: %s", line);
+ if (strncasecmp(line, "Message-ID: ", 12))
+ return;
+ char *c = skip_cfws(line+12);
+ if (*c++ != '<')
+ return;
+
+ char lhs[MAX_HDR_LEN], *l = lhs;
+ if (*c == '\"') // LHS is no-fold-quote
+ {
+ c++;
+ while (*c != '\"')
+ {
+ if (!*c)
+ return;
+ else if (*c == '\\' && c[1])
+ {
+ *l++ = c[1];
+ c += 2;
+ }
+ else
+ *l++ = *c++;
+ }
+ c++;
+ }
+ else // LHS is dot-atom-text
+ {
+ while (*c && *c != '@')
+ *l++ = *c++;
+ }
+ *l++ = 0;
+
+ if (*c++ != '@') // "@" is mandatory
+ return;
+
+ char rhs[MAX_HDR_LEN], *r = rhs;
+ if (*c == '[') // RHS is no-fold-literal
+ {
+ while (*c != ']')
+ {
+ if (!*c)
+ return;
+ else if (*c == '\\' && c[1])
+ {
+ *r++ = c[1];
+ c += 2;
+ }
+ else
+ *r++ = *c++;
+ }
+ c++;
+ }
+ else // RHS is dot-atom-text
+ {
+ while (*c && *c != '>')
+ *r++ = *c++;
+ }
+ *r++ = 0;
+
+ if (*c != '>')
+ return;
+
+ *c = 0;
+ verb(1, "Parsed Message-ID <%s@%s>", lhs, rhs);
+
+ if (local_domain && local_user)
+ {
+ uns lul = strlen(local_user);
+ if (!strcasecmp(rhs, local_domain) &&
+ !strncasecmp(lhs, local_user, lul) &&
+ !strncasecmp(lhs+lul, "+md+", 4))
+ {
+ verb(1, "Detected local Message-ID");
+ item->timestamp = 2;
+ return;
+ }
+ }
+
+ struct sha1_ctx ctx;
+ sha1_init(&ctx);
+ sha1_update(&ctx, lhs, l-lhs);
+ sha1_update(&ctx, rhs, r-rhs);
+ sha1_final(&ctx, item->digest);
+ item->timestamp = 1;
+ if (verbose >= 1)
+ {
+ fprintf(stderr, "Digest: ");
+ for (uns i=0; i<20; i++)
+ fprintf(stderr, "%02x", item->digest[i]);
+ fprintf(stderr, "\n");
+ }
+}
+
+static int
+parse_header(struct item *item)
+{
+ char buf[MAX_HDR_LEN+1];
+ uns cnt = 0;
+ uns last_nl = 0;
+
+ item->timestamp = 0;
+ for (;;)
+ {
+ int c = getchar();
+ if (c < 0)
+ {
+ verb(1, "Incomplete header");
+ return -1;
+ }
+ if (c == '\r')
+ ;
+ else if (c == '\n')
+ {
+ if (cnt == last_nl) // End of header
+ {
+ parse_header_line(buf, cnt, item);
+ return item->timestamp;
+ }
+ last_nl = cnt;
+ }
+ else if (c == ' ' || c == '\t' || !c)
+ {
+ if (!cnt)
+ {
+ verb(1, "Misplaced whitespace at the beginning of header");
+ return -1;
+ }
+ if (cnt < MAX_HDR_LEN)
+ buf[cnt++] = ' ';
+ }
+ else
+ {
+ if (cnt == last_nl)
+ {
+ parse_header_line(buf, cnt, item);
+ cnt = last_nl = 0;
+ }
+ if (cnt < MAX_HDR_LEN)
+ buf[cnt++] = c;
+ }
+ }
+}
+
+static void NONRET
+usage(void)
+{
+ fprintf(stderr, "Usage: mdup [<options>]\n\
+\n\
+Options:\n\
+-a <age>\t\tRecords older than <age> hours are ignored (default: 24)\n\
+-d <db>\t\t\tUse <db> as a Message-ID database (default: ~/.mdup.db; beware of NFS)\n\
+-l <user>@<domain>\tDetect looped back messages by their Message-ID\n\
+-v\t\t\tIncrease verbosity\n\
+\n\
+MailDups " STR(VERSION) ", (c) " STR(YEAR) " Martin Mares <mj@ucw.cz>\n\
+It can be freely distributed and used according to the GNU GPL v2.\n\
+");
+ exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ while ((c = getopt(argc, argv, "a:d:l:v")) >= 0)
+ switch (c)
+ {
+ case 'a':
+ max_age = atol(optarg) * 3600;
+ break;
+ case 'd':
+ db_name = optarg;
+ break;
+ case 'l':
+ {
+ char *c = strchr(optarg, '@');
+ if (!c)
+ usage();
+ *c++ = 0;
+ local_user = optarg;
+ local_domain = c;
+ break;
+ }
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ }
+ if (optind < argc)
+ usage();
+
+ now = time(NULL);
+
+ struct item msg;
+ int ok = parse_header(&msg);
+ switch (ok)
+ {
+ case 0:
+ puts("NO ID");
+ break;
+ case 1:
+ db_open();
+ int ret = db_lookup(&msg);
+ db_close();
+ puts(ret ? "DUP" : "OK");
+ break;
+ case 2:
+ puts("LOCAL");
+ break;
+ default:
+ puts("ERROR");
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * SHA1 Secure Hash Algorithm.
+ *
+ * Derived by Martin Mares from Linux Kernel implementation, which is:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SHA transform algorithm originally taken from code written by
+ * Peter Gutmann, and placed in the public domain.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "util.h"
+
+static inline u32 rol32(u32 x, uns bits)
+{
+ return (x << bits) | (x >> (32 - bits));
+}
+
+#define SHA_WORKSPACE_WORDS 80
+
+/* The SHA f()-functions. */
+
+#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */
+#define f2(x,y,z) (x ^ y ^ z) /* XOR */
+#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */
+
+/* The SHA Mysterious Constants */
+
+#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */
+#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */
+#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */
+#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */
+
+/*
+ * sha_transform: single block SHA1 transform
+ *
+ * @digest: 160 bit digest to update
+ * @data: 512 bits of data to hash
+ * @W: 80 words of workspace (see note)
+ *
+ * This function generates a SHA1 digest for a single 512-bit block.
+ * Be warned, it does not handle padding and message digest, do not
+ * confuse it with the full FIPS 180-1 digest algorithm for variable
+ * length messages.
+ *
+ * Note: If the hash is security sensitive, the caller should be sure
+ * to clear the workspace. This is left to the caller to avoid
+ * unnecessary clears between chained hashing operations.
+ */
+static void sha_transform(u32 *digest, const u8 *in, u32 *W)
+{
+ u32 a, b, c, d, e, t, i;
+
+ for (i = 0; i < 16; i++)
+ W[i] = (in[4*i] << 24) | (in[4*i+1] << 16) | (in[4*i+2] << 8) | in[4*i+3];
+
+ for (i = 0; i < 64; i++)
+ W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1);
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ for (i = 0; i < 20; i++) {
+ t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 40; i ++) {
+ t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 60; i ++) {
+ t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ for (; i < 80; i ++) {
+ t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i];
+ e = d; d = c; c = rol32(b, 30); b = a; a = t;
+ }
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+
+void sha1_init(struct sha1_ctx *sctx)
+{
+ static const struct sha1_ctx initstate = {
+ 0,
+ { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
+ { 0, }
+ };
+
+ *sctx = initstate;
+}
+
+void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len)
+{
+ unsigned int i, j;
+ u32 temp[SHA_WORKSPACE_WORDS];
+
+ j = (sctx->count >> 3) & 0x3f;
+ sctx->count += len << 3;
+
+ if ((j + len) > 63) {
+ memcpy(&sctx->buffer[j], data, (i = 64-j));
+ sha_transform(sctx->state, sctx->buffer, temp);
+ for ( ; i + 63 < len; i += 64) {
+ sha_transform(sctx->state, &data[i], temp);
+ }
+ j = 0;
+ }
+ else i = 0;
+ memcpy(&sctx->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void sha1_final(struct sha1_ctx *sctx, u8 *out)
+{
+ u32 i, j, index, padlen;
+ u64 t;
+ u8 bits[8] = { 0, };
+ static const u8 padding[64] = { 0x80, };
+
+ t = sctx->count;
+ bits[7] = 0xff & t; t>>=8;
+ bits[6] = 0xff & t; t>>=8;
+ bits[5] = 0xff & t; t>>=8;
+ bits[4] = 0xff & t; t>>=8;
+ bits[3] = 0xff & t; t>>=8;
+ bits[2] = 0xff & t; t>>=8;
+ bits[1] = 0xff & t; t>>=8;
+ bits[0] = 0xff & t;
+
+ /* Pad out to 56 mod 64 */
+ index = (sctx->count >> 3) & 0x3f;
+ padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+ sha1_update(sctx, padding, padlen);
+
+ /* Append length */
+ sha1_update(sctx, bits, sizeof bits);
+
+ /* Store state in digest */
+ for (i = j = 0; i < 5; i++, j += 4) {
+ u32 t2 = sctx->state[i];
+ out[j+3] = t2 & 0xff; t2>>=8;
+ out[j+2] = t2 & 0xff; t2>>=8;
+ out[j+1] = t2 & 0xff; t2>>=8;
+ out[j ] = t2 & 0xff;
+ }
+}
--- /dev/null
+/*
+ * Utility Functions
+ *
+ * (c) 2005--2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "util.h"
+
+int verbose;
+
+void NONRET
+die(char *c, ...)
+{
+ va_list args;
+ va_start(args, c);
+ fprintf(stderr, "mdup: ");
+ vfprintf(stderr, c, args);
+ fputc('\n', stderr);
+ va_end(args);
+ exit(1);
+}
+
+void
+verb(int level, char *c, ...)
+{
+ va_list args;
+ va_start(args, c);
+ if (verbose >= level)
+ {
+ vfprintf(stderr, c, args);
+ fputc('\n', stderr);
+ }
+ va_end(args);
+}
+
+void *
+xmalloc(uns size)
+{
+ void *buf = malloc(size);
+ if (!buf)
+ die("Unable to allocate %d bytes of memory", size);
+ return buf;
+}
+
+void *
+xrealloc(void *old, uns size)
+{
+ void *buf = realloc(old, size);
+ if (!buf)
+ die("Unable to allocate %d bytes of memory", size);
+ return buf;
+}
--- /dev/null
+/*
+ * Various Utility Functions (extracted from the UCW Library)
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdint.h>
+
+#ifdef __GNUC__
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+#else
+#define NONRET
+#define UNUSED
+#define likely(x) (x)
+#define ulikely(x) (x)
+#endif
+
+#define STR2(c) #c
+#define STR(c) STR2(c)
+
+typedef unsigned int uns;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+/* util.c */
+
+extern int verbose;
+
+void NONRET die(char *x, ...);
+void verb(int level, char *x, ...);
+void *xmalloc(uns size);
+void *xrealloc(void *old, uns size);
+
+/* sha1.c */
+
+struct sha1_ctx {
+ u64 count;
+ u32 state[5];
+ u8 buffer[64];
+};
+
+void sha1_init(struct sha1_ctx *ctx);
+void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len);
+void sha1_final(struct sha1_ctx *sctx, u8 *out);