From 9484ba4f3f2db1fe20e26d2f0195a0762d3893c1 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Sat, 7 Jan 2006 23:32:42 +0000
Subject: [PATCH] Initial revision

---
 Makefile |  22 ++++
 mdup.c   | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 sha1.c   | 169 ++++++++++++++++++++++++++
 util.c   |  57 +++++++++
 util.h   |  49 ++++++++
 5 files changed, 661 insertions(+)
 create mode 100644 Makefile
 create mode 100644 mdup.c
 create mode 100644 sha1.c
 create mode 100644 util.c
 create mode 100644 util.h

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..65127cc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,22 @@
+#DEBUG=-ggdb
+CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline $(DEBUG) -std=gnu99 -DVERSION=$(VERSION) -DYEAR=$(YEAR)
+
+VERSION=0.1
+YEAR=2006
+
+all: mdup
+
+mdup: mdup.o util.o sha1.o
+
+mdup.o: mdup.c util.h
+util.o: util.c util.h
+sha1.o: sha1.c util.h
+
+clean:
+	rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core`
+	rm -f mdup
+	rm -rf maint/dist
+
+distclean: clean
+
+.PHONY: all clean distclean
diff --git a/mdup.c b/mdup.c
new file mode 100644
index 0000000..0d35541
--- /dev/null
+++ b/mdup.c
@@ -0,0 +1,364 @@
+/*
+ *	Duplicate Mail Checker
+ *
+ *	(c) 2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pwd.h>
+#include <time.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include "util.h"
+
+static char *db_name;
+static uns max_age = 86400;
+static uns now;
+static char *local_user;
+static char *local_domain;
+
+struct item {
+  unsigned char digest[20];
+  uns timestamp;
+};
+
+static int db_fd;
+static uns db_items;
+static struct item *db_map;
+static uns db_map_size;
+
+static void
+db_open(void)
+{
+  if (!db_name)
+    {
+      struct passwd *pw = getpwuid(getuid());
+      if (!pw)
+	die("Sorry, but you don't exist!");
+      db_name = xmalloc(strlen(pw->pw_dir) + 10);
+      sprintf(db_name, "%s/.mdup.db", pw->pw_dir);
+    }
+  verb(2, "Opening database %s", db_name);
+  db_fd = open(db_name, O_RDWR | O_CREAT, 0600);
+  if (db_fd < 0)
+    die("Cannot open database %s: %m", db_name);
+  if (flock(db_fd, LOCK_EX) < 0)
+    die("Cannot flock %s: %m", db_name);
+
+  struct stat st;
+  if (fstat(db_fd, &st) < 0)
+    die("Cannot stat %s: %m", db_name);
+  if (st.st_size % sizeof(struct item))
+    die("Database %s is inconsistent: Size is not an integer number of records");
+  db_items = st.st_size / sizeof(struct item);
+  verb(2, "Mapping %d items", db_items);
+
+  db_map_size = sizeof(struct item) * (db_items+16);
+  db_map = mmap(NULL, db_map_size, PROT_READ | PROT_WRITE, MAP_SHARED, db_fd, 0);
+  if (db_map == MAP_FAILED)
+    die("Mmap on %s failed: %m", db_name);
+}
+
+static void
+db_close(void)
+{
+  munmap(db_map, db_map_size);
+  flock(db_fd, LOCK_UN);
+  close(db_fd);
+}
+
+static int
+db_lookup(struct item *new)
+{
+  struct item *free = NULL;
+
+  for (uns i=0; i<db_items; i++)
+    {
+      struct item *t = &db_map[i];
+      if (t->timestamp <= now && now - t->timestamp > max_age)
+	{
+	  if (!free)
+	    free = t;
+	}
+      else if (!memcmp(t->digest, new->digest, 20))
+	{
+	  verb(2, "Found at item %d, age %d sec", i, now - t->timestamp);
+	  t->timestamp = now;
+	  return 1;
+	}
+    }
+
+  if (!free)
+    {
+      if (sizeof(struct item) * db_items >= db_map_size)
+	die("Internal error: map window too small");
+      free = &db_map[db_items++];
+      if (ftruncate(db_fd, sizeof(struct item) * db_items) < 0)
+	die("Cannot enlarge %s: %m", db_name);
+    }
+  verb(2, "Creating new entry");
+  *free = *new;
+  free->timestamp = now;
+  return 0;
+}
+
+#define MAX_HDR_LEN 1024
+
+static char *
+skip_cfws(char *c)
+{
+  int nest = 0;
+
+  for (;;)
+    {
+      if (!*c)
+	return c;
+      else if (*c == ' ')
+	;
+      else if (*c == '(')
+	nest++;
+      else if (!nest)
+	return c;
+      else if (*c == ')')
+	nest--;
+      else if (*c == '\\' && c[1])
+	c++;
+      c++;
+    }
+}
+
+static void
+parse_header_line(char *line, uns cnt, struct item *item)
+{
+  if (!cnt)
+    return;
+  if (cnt >= MAX_HDR_LEN)
+    {
+      verb(2, "HDR: <too long>");
+      return;
+    }
+
+  line[cnt] = 0;
+  verb(2, "HDR: %s", line);
+  if (strncasecmp(line, "Message-ID: ", 12))
+    return;
+  char *c = skip_cfws(line+12);
+  if (*c++ != '<')
+    return;
+
+  char lhs[MAX_HDR_LEN], *l = lhs;
+  if (*c == '\"')		// LHS is no-fold-quote
+    {
+      c++;
+      while (*c != '\"')
+	{
+	  if (!*c)
+	    return;
+	  else if (*c == '\\' && c[1])
+	    {
+	      *l++ = c[1];
+	      c += 2;
+	    }
+	  else
+	    *l++ = *c++;
+	}
+      c++;
+    }
+  else				// LHS is dot-atom-text
+    {
+      while (*c && *c != '@')
+	*l++ = *c++;
+    }
+  *l++ = 0;
+
+  if (*c++ != '@')		// "@" is mandatory
+    return;
+
+  char rhs[MAX_HDR_LEN], *r = rhs;
+  if (*c == '[')		// RHS is no-fold-literal
+    {
+      while (*c != ']')
+	{
+	  if (!*c)
+	    return;
+	  else if (*c == '\\' && c[1])
+	    {
+	      *r++ = c[1];
+	      c += 2;
+	    }
+	  else
+	    *r++ = *c++;
+	}
+      c++;
+    }
+  else				// RHS is dot-atom-text
+    {
+      while (*c && *c != '>')
+	*r++ = *c++;
+    }
+  *r++ = 0;
+
+  if (*c != '>')
+    return;
+
+  *c = 0;
+  verb(1, "Parsed Message-ID <%s@%s>", lhs, rhs);
+
+  if (local_domain && local_user)
+    {
+      uns lul = strlen(local_user);
+      if (!strcasecmp(rhs, local_domain) &&
+	  !strncasecmp(lhs, local_user, lul) &&
+	  !strncasecmp(lhs+lul, "+md+", 4))
+	{
+	  verb(1, "Detected local Message-ID");
+	  item->timestamp = 2;
+	  return;
+	}
+    }
+
+  struct sha1_ctx ctx;
+  sha1_init(&ctx);
+  sha1_update(&ctx, lhs, l-lhs);
+  sha1_update(&ctx, rhs, r-rhs);
+  sha1_final(&ctx, item->digest);
+  item->timestamp = 1;
+  if (verbose >= 1)
+    {
+      fprintf(stderr, "Digest: ");
+      for (uns i=0; i<20; i++)
+	fprintf(stderr, "%02x", item->digest[i]);
+      fprintf(stderr, "\n");
+    }
+}
+
+static int
+parse_header(struct item *item)
+{
+  char buf[MAX_HDR_LEN+1];
+  uns cnt = 0;
+  uns last_nl = 0;
+
+  item->timestamp = 0;
+  for (;;)
+    {
+      int c = getchar();
+      if (c < 0)
+	{
+	  verb(1, "Incomplete header");
+	  return -1;
+	}
+      if (c == '\r')
+	;
+      else if (c == '\n')
+	{
+	  if (cnt == last_nl)	// End of header
+	    {
+	      parse_header_line(buf, cnt, item);
+	      return item->timestamp;
+	    }
+	  last_nl = cnt;
+	}
+      else if (c == ' ' || c == '\t' || !c)
+	{
+	  if (!cnt)
+	    {
+	      verb(1, "Misplaced whitespace at the beginning of header");
+	      return -1;
+	    }
+	  if (cnt < MAX_HDR_LEN)
+	    buf[cnt++] = ' ';
+	}
+      else
+	{
+	  if (cnt == last_nl)
+	    {
+	      parse_header_line(buf, cnt, item);
+	      cnt = last_nl = 0;
+	    }
+	  if (cnt < MAX_HDR_LEN)
+	    buf[cnt++] = c;
+	}
+    }
+}
+
+static void NONRET
+usage(void)
+{
+  fprintf(stderr, "Usage: mdup [<options>]\n\
+\n\
+Options:\n\
+-a <age>\t\tRecords older than <age> hours are ignored (default: 24)\n\
+-d <db>\t\t\tUse <db> as a Message-ID database (default: ~/.mdup.db; beware of NFS)\n\
+-l <user>@<domain>\tDetect looped back messages by their Message-ID\n\
+-v\t\t\tIncrease verbosity\n\
+\n\
+MailDups " STR(VERSION) ", (c) " STR(YEAR) " Martin Mares <mj@ucw.cz>\n\
+It can be freely distributed and used according to the GNU GPL v2.\n\
+");
+  exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+  int c;
+  while ((c = getopt(argc, argv, "a:d:l:v")) >= 0)
+    switch (c)
+      {
+      case 'a':
+	max_age = atol(optarg) * 3600;
+	break;
+      case 'd':
+	db_name = optarg;
+	break;
+      case 'l':
+	{
+	  char *c = strchr(optarg, '@');
+	  if (!c)
+	    usage();
+	  *c++ = 0;
+	  local_user = optarg;
+	  local_domain = c;
+	  break;
+	}
+      case 'v':
+	verbose++;
+	break;
+      default:
+	usage();
+      }
+  if (optind < argc)
+    usage();
+
+  now = time(NULL);
+
+  struct item msg;
+  int ok = parse_header(&msg);
+  switch (ok)
+    {
+    case 0:
+      puts("NO ID");
+      break;
+    case 1:
+      db_open();
+      int ret = db_lookup(&msg);
+      db_close();
+      puts(ret ? "DUP" : "OK");
+      break;
+    case 2:
+      puts("LOCAL");
+      break;
+    default:
+      puts("ERROR");
+    }
+
+  return 0;
+}
diff --git a/sha1.c b/sha1.c
new file mode 100644
index 0000000..14d4f72
--- /dev/null
+++ b/sha1.c
@@ -0,0 +1,169 @@
+/*
+ * SHA1 Secure Hash Algorithm.
+ *
+ * Derived by Martin Mares from Linux Kernel implementation, which is:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * SHA transform algorithm originally taken from code written by
+ * Peter Gutmann, and placed in the public domain.
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include "util.h"
+
+static inline u32 rol32(u32 x, uns bits)
+{
+	return (x << bits) | (x >> (32 - bits));
+}
+
+#define SHA_WORKSPACE_WORDS 80
+
+/* The SHA f()-functions.  */
+
+#define f1(x,y,z)   (z ^ (x & (y ^ z)))		/* x ? y : z */
+#define f2(x,y,z)   (x ^ y ^ z)			/* XOR */
+#define f3(x,y,z)   ((x & y) + (z & (x ^ y)))	/* majority */
+
+/* The SHA Mysterious Constants */
+
+#define K1  0x5A827999L			/* Rounds  0-19: sqrt(2) * 2^30 */
+#define K2  0x6ED9EBA1L			/* Rounds 20-39: sqrt(3) * 2^30 */
+#define K3  0x8F1BBCDCL			/* Rounds 40-59: sqrt(5) * 2^30 */
+#define K4  0xCA62C1D6L			/* Rounds 60-79: sqrt(10) * 2^30 */
+
+/*
+ * sha_transform: single block SHA1 transform
+ *
+ * @digest: 160 bit digest to update
+ * @data:   512 bits of data to hash
+ * @W:      80 words of workspace (see note)
+ *
+ * This function generates a SHA1 digest for a single 512-bit block.
+ * Be warned, it does not handle padding and message digest, do not
+ * confuse it with the full FIPS 180-1 digest algorithm for variable
+ * length messages.
+ *
+ * Note: If the hash is security sensitive, the caller should be sure
+ * to clear the workspace. This is left to the caller to avoid
+ * unnecessary clears between chained hashing operations.
+ */
+static void sha_transform(u32 *digest, const u8 *in, u32 *W)
+{
+	u32 a, b, c, d, e, t, i;
+
+	for (i = 0; i < 16; i++)
+		W[i] = (in[4*i] << 24) | (in[4*i+1] << 16) | (in[4*i+2] << 8) | in[4*i+3];
+
+	for (i = 0; i < 64; i++)
+		W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1);
+
+	a = digest[0];
+	b = digest[1];
+	c = digest[2];
+	d = digest[3];
+	e = digest[4];
+
+	for (i = 0; i < 20; i++) {
+		t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i];
+		e = d; d = c; c = rol32(b, 30); b = a; a = t;
+	}
+
+	for (; i < 40; i ++) {
+		t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i];
+		e = d; d = c; c = rol32(b, 30); b = a; a = t;
+	}
+
+	for (; i < 60; i ++) {
+		t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i];
+		e = d; d = c; c = rol32(b, 30); b = a; a = t;
+	}
+
+	for (; i < 80; i ++) {
+		t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i];
+		e = d; d = c; c = rol32(b, 30); b = a; a = t;
+	}
+
+	digest[0] += a;
+	digest[1] += b;
+	digest[2] += c;
+	digest[3] += d;
+	digest[4] += e;
+}
+
+void sha1_init(struct sha1_ctx *sctx)
+{
+	static const struct sha1_ctx initstate = {
+	  0,
+	  { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 },
+	  { 0, }
+	};
+
+	*sctx = initstate;
+}
+
+void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int i, j;
+	u32 temp[SHA_WORKSPACE_WORDS];
+
+	j = (sctx->count >> 3) & 0x3f;
+	sctx->count += len << 3;
+
+	if ((j + len) > 63) {
+		memcpy(&sctx->buffer[j], data, (i = 64-j));
+		sha_transform(sctx->state, sctx->buffer, temp);
+		for ( ; i + 63 < len; i += 64) {
+			sha_transform(sctx->state, &data[i], temp);
+		}
+		j = 0;
+	}
+	else i = 0;
+	memcpy(&sctx->buffer[j], &data[i], len - i);
+}
+
+
+/* Add padding and return the message digest. */
+void sha1_final(struct sha1_ctx *sctx, u8 *out)
+{
+	u32 i, j, index, padlen;
+	u64 t;
+	u8 bits[8] = { 0, };
+	static const u8 padding[64] = { 0x80, };
+
+	t = sctx->count;
+	bits[7] = 0xff & t; t>>=8;
+	bits[6] = 0xff & t; t>>=8;
+	bits[5] = 0xff & t; t>>=8;
+	bits[4] = 0xff & t; t>>=8;
+	bits[3] = 0xff & t; t>>=8;
+	bits[2] = 0xff & t; t>>=8;
+	bits[1] = 0xff & t; t>>=8;
+	bits[0] = 0xff & t;
+
+	/* Pad out to 56 mod 64 */
+	index = (sctx->count >> 3) & 0x3f;
+	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
+	sha1_update(sctx, padding, padlen);
+
+	/* Append length */
+	sha1_update(sctx, bits, sizeof bits); 
+
+	/* Store state in digest */
+	for (i = j = 0; i < 5; i++, j += 4) {
+		u32 t2 = sctx->state[i];
+		out[j+3] = t2 & 0xff; t2>>=8;
+		out[j+2] = t2 & 0xff; t2>>=8;
+		out[j+1] = t2 & 0xff; t2>>=8;
+		out[j  ] = t2 & 0xff;
+	}
+}
diff --git a/util.c b/util.c
new file mode 100644
index 0000000..d3e24a1
--- /dev/null
+++ b/util.c
@@ -0,0 +1,57 @@
+/*
+ *	Utility Functions
+ *
+ *	(c) 2005--2006 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "util.h"
+
+int verbose;
+
+void NONRET
+die(char *c, ...)
+{
+  va_list args;
+  va_start(args, c);
+  fprintf(stderr, "mdup: ");
+  vfprintf(stderr, c, args);
+  fputc('\n', stderr);
+  va_end(args);
+  exit(1);
+}
+
+void
+verb(int level, char *c, ...)
+{
+  va_list args;
+  va_start(args, c);
+  if (verbose >= level)
+    {
+      vfprintf(stderr, c, args);
+      fputc('\n', stderr);
+    }
+  va_end(args);
+}
+
+void *
+xmalloc(uns size)
+{
+  void *buf = malloc(size);
+  if (!buf)
+    die("Unable to allocate %d bytes of memory", size);
+  return buf;
+}
+
+void *
+xrealloc(void *old, uns size)
+{
+  void *buf = realloc(old, size);
+  if (!buf)
+    die("Unable to allocate %d bytes of memory", size);
+  return buf;
+}
diff --git a/util.h b/util.h
new file mode 100644
index 0000000..66c78ce
--- /dev/null
+++ b/util.h
@@ -0,0 +1,49 @@
+/*
+ *	Various Utility Functions (extracted from the UCW Library)
+ *
+ *	(c) 2005 Martin Mares <mj@ucw.cz>
+ */
+
+#include <stdint.h>
+
+#ifdef __GNUC__
+#define NONRET __attribute__((noreturn))
+#define UNUSED __attribute__((unused))
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+#else
+#define NONRET
+#define UNUSED
+#define likely(x) (x)
+#define ulikely(x) (x)
+#endif
+
+#define STR2(c) #c
+#define STR(c) STR2(c)
+
+typedef unsigned int uns;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+
+/* util.c */
+
+extern int verbose;
+
+void NONRET die(char *x, ...);
+void verb(int level, char *x, ...);
+void *xmalloc(uns size);
+void *xrealloc(void *old, uns size);
+
+/* sha1.c */
+
+struct sha1_ctx {
+        u64 count;
+        u32 state[5];
+        u8 buffer[64];
+};
+
+void sha1_init(struct sha1_ctx *ctx);
+void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len);
+void sha1_final(struct sha1_ctx *sctx, u8 *out);
-- 
2.39.2