From 9484ba4f3f2db1fe20e26d2f0195a0762d3893c1 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Sat, 7 Jan 2006 23:32:42 +0000 Subject: [PATCH] Initial revision --- Makefile | 22 ++++ mdup.c | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ sha1.c | 169 ++++++++++++++++++++++++++ util.c | 57 +++++++++ util.h | 49 ++++++++ 5 files changed, 661 insertions(+) create mode 100644 Makefile create mode 100644 mdup.c create mode 100644 sha1.c create mode 100644 util.c create mode 100644 util.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..65127cc --- /dev/null +++ b/Makefile @@ -0,0 +1,22 @@ +#DEBUG=-ggdb +CFLAGS=-O2 -Wall -W -Wno-parentheses -Wstrict-prototypes -Wmissing-prototypes -Winline $(DEBUG) -std=gnu99 -DVERSION=$(VERSION) -DYEAR=$(YEAR) + +VERSION=0.1 +YEAR=2006 + +all: mdup + +mdup: mdup.o util.o sha1.o + +mdup.o: mdup.c util.h +util.o: util.c util.h +sha1.o: sha1.c util.h + +clean: + rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core` + rm -f mdup + rm -rf maint/dist + +distclean: clean + +.PHONY: all clean distclean diff --git a/mdup.c b/mdup.c new file mode 100644 index 0000000..0d35541 --- /dev/null +++ b/mdup.c @@ -0,0 +1,364 @@ +/* + * Duplicate Mail Checker + * + * (c) 2006 Martin Mares + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +static char *db_name; +static uns max_age = 86400; +static uns now; +static char *local_user; +static char *local_domain; + +struct item { + unsigned char digest[20]; + uns timestamp; +}; + +static int db_fd; +static uns db_items; +static struct item *db_map; +static uns db_map_size; + +static void +db_open(void) +{ + if (!db_name) + { + struct passwd *pw = getpwuid(getuid()); + if (!pw) + die("Sorry, but you don't exist!"); + db_name = xmalloc(strlen(pw->pw_dir) + 10); + sprintf(db_name, "%s/.mdup.db", pw->pw_dir); + } + verb(2, "Opening database %s", db_name); + db_fd = open(db_name, O_RDWR | O_CREAT, 0600); + if (db_fd < 0) + die("Cannot open database %s: %m", db_name); + if (flock(db_fd, LOCK_EX) < 0) + die("Cannot flock %s: %m", db_name); + + struct stat st; + if (fstat(db_fd, &st) < 0) + die("Cannot stat %s: %m", db_name); + if (st.st_size % sizeof(struct item)) + die("Database %s is inconsistent: Size is not an integer number of records"); + db_items = st.st_size / sizeof(struct item); + verb(2, "Mapping %d items", db_items); + + db_map_size = sizeof(struct item) * (db_items+16); + db_map = mmap(NULL, db_map_size, PROT_READ | PROT_WRITE, MAP_SHARED, db_fd, 0); + if (db_map == MAP_FAILED) + die("Mmap on %s failed: %m", db_name); +} + +static void +db_close(void) +{ + munmap(db_map, db_map_size); + flock(db_fd, LOCK_UN); + close(db_fd); +} + +static int +db_lookup(struct item *new) +{ + struct item *free = NULL; + + for (uns i=0; itimestamp <= now && now - t->timestamp > max_age) + { + if (!free) + free = t; + } + else if (!memcmp(t->digest, new->digest, 20)) + { + verb(2, "Found at item %d, age %d sec", i, now - t->timestamp); + t->timestamp = now; + return 1; + } + } + + if (!free) + { + if (sizeof(struct item) * db_items >= db_map_size) + die("Internal error: map window too small"); + free = &db_map[db_items++]; + if (ftruncate(db_fd, sizeof(struct item) * db_items) < 0) + die("Cannot enlarge %s: %m", db_name); + } + verb(2, "Creating new entry"); + *free = *new; + free->timestamp = now; + return 0; +} + +#define MAX_HDR_LEN 1024 + +static char * +skip_cfws(char *c) +{ + int nest = 0; + + for (;;) + { + if (!*c) + return c; + else if (*c == ' ') + ; + else if (*c == '(') + nest++; + else if (!nest) + return c; + else if (*c == ')') + nest--; + else if (*c == '\\' && c[1]) + c++; + c++; + } +} + +static void +parse_header_line(char *line, uns cnt, struct item *item) +{ + if (!cnt) + return; + if (cnt >= MAX_HDR_LEN) + { + verb(2, "HDR: "); + return; + } + + line[cnt] = 0; + verb(2, "HDR: %s", line); + if (strncasecmp(line, "Message-ID: ", 12)) + return; + char *c = skip_cfws(line+12); + if (*c++ != '<') + return; + + char lhs[MAX_HDR_LEN], *l = lhs; + if (*c == '\"') // LHS is no-fold-quote + { + c++; + while (*c != '\"') + { + if (!*c) + return; + else if (*c == '\\' && c[1]) + { + *l++ = c[1]; + c += 2; + } + else + *l++ = *c++; + } + c++; + } + else // LHS is dot-atom-text + { + while (*c && *c != '@') + *l++ = *c++; + } + *l++ = 0; + + if (*c++ != '@') // "@" is mandatory + return; + + char rhs[MAX_HDR_LEN], *r = rhs; + if (*c == '[') // RHS is no-fold-literal + { + while (*c != ']') + { + if (!*c) + return; + else if (*c == '\\' && c[1]) + { + *r++ = c[1]; + c += 2; + } + else + *r++ = *c++; + } + c++; + } + else // RHS is dot-atom-text + { + while (*c && *c != '>') + *r++ = *c++; + } + *r++ = 0; + + if (*c != '>') + return; + + *c = 0; + verb(1, "Parsed Message-ID <%s@%s>", lhs, rhs); + + if (local_domain && local_user) + { + uns lul = strlen(local_user); + if (!strcasecmp(rhs, local_domain) && + !strncasecmp(lhs, local_user, lul) && + !strncasecmp(lhs+lul, "+md+", 4)) + { + verb(1, "Detected local Message-ID"); + item->timestamp = 2; + return; + } + } + + struct sha1_ctx ctx; + sha1_init(&ctx); + sha1_update(&ctx, lhs, l-lhs); + sha1_update(&ctx, rhs, r-rhs); + sha1_final(&ctx, item->digest); + item->timestamp = 1; + if (verbose >= 1) + { + fprintf(stderr, "Digest: "); + for (uns i=0; i<20; i++) + fprintf(stderr, "%02x", item->digest[i]); + fprintf(stderr, "\n"); + } +} + +static int +parse_header(struct item *item) +{ + char buf[MAX_HDR_LEN+1]; + uns cnt = 0; + uns last_nl = 0; + + item->timestamp = 0; + for (;;) + { + int c = getchar(); + if (c < 0) + { + verb(1, "Incomplete header"); + return -1; + } + if (c == '\r') + ; + else if (c == '\n') + { + if (cnt == last_nl) // End of header + { + parse_header_line(buf, cnt, item); + return item->timestamp; + } + last_nl = cnt; + } + else if (c == ' ' || c == '\t' || !c) + { + if (!cnt) + { + verb(1, "Misplaced whitespace at the beginning of header"); + return -1; + } + if (cnt < MAX_HDR_LEN) + buf[cnt++] = ' '; + } + else + { + if (cnt == last_nl) + { + parse_header_line(buf, cnt, item); + cnt = last_nl = 0; + } + if (cnt < MAX_HDR_LEN) + buf[cnt++] = c; + } + } +} + +static void NONRET +usage(void) +{ + fprintf(stderr, "Usage: mdup []\n\ +\n\ +Options:\n\ +-a \t\tRecords older than hours are ignored (default: 24)\n\ +-d \t\t\tUse as a Message-ID database (default: ~/.mdup.db; beware of NFS)\n\ +-l @\tDetect looped back messages by their Message-ID\n\ +-v\t\t\tIncrease verbosity\n\ +\n\ +MailDups " STR(VERSION) ", (c) " STR(YEAR) " Martin Mares \n\ +It can be freely distributed and used according to the GNU GPL v2.\n\ +"); + exit(1); +} + +int +main(int argc, char **argv) +{ + int c; + while ((c = getopt(argc, argv, "a:d:l:v")) >= 0) + switch (c) + { + case 'a': + max_age = atol(optarg) * 3600; + break; + case 'd': + db_name = optarg; + break; + case 'l': + { + char *c = strchr(optarg, '@'); + if (!c) + usage(); + *c++ = 0; + local_user = optarg; + local_domain = c; + break; + } + case 'v': + verbose++; + break; + default: + usage(); + } + if (optind < argc) + usage(); + + now = time(NULL); + + struct item msg; + int ok = parse_header(&msg); + switch (ok) + { + case 0: + puts("NO ID"); + break; + case 1: + db_open(); + int ret = db_lookup(&msg); + db_close(); + puts(ret ? "DUP" : "OK"); + break; + case 2: + puts("LOCAL"); + break; + default: + puts("ERROR"); + } + + return 0; +} diff --git a/sha1.c b/sha1.c new file mode 100644 index 0000000..14d4f72 --- /dev/null +++ b/sha1.c @@ -0,0 +1,169 @@ +/* + * SHA1 Secure Hash Algorithm. + * + * Derived by Martin Mares from Linux Kernel implementation, which is: + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * SHA transform algorithm originally taken from code written by + * Peter Gutmann, and placed in the public domain. + */ + +#include +#include + +#include "util.h" + +static inline u32 rol32(u32 x, uns bits) +{ + return (x << bits) | (x >> (32 - bits)); +} + +#define SHA_WORKSPACE_WORDS 80 + +/* The SHA f()-functions. */ + +#define f1(x,y,z) (z ^ (x & (y ^ z))) /* x ? y : z */ +#define f2(x,y,z) (x ^ y ^ z) /* XOR */ +#define f3(x,y,z) ((x & y) + (z & (x ^ y))) /* majority */ + +/* The SHA Mysterious Constants */ + +#define K1 0x5A827999L /* Rounds 0-19: sqrt(2) * 2^30 */ +#define K2 0x6ED9EBA1L /* Rounds 20-39: sqrt(3) * 2^30 */ +#define K3 0x8F1BBCDCL /* Rounds 40-59: sqrt(5) * 2^30 */ +#define K4 0xCA62C1D6L /* Rounds 60-79: sqrt(10) * 2^30 */ + +/* + * sha_transform: single block SHA1 transform + * + * @digest: 160 bit digest to update + * @data: 512 bits of data to hash + * @W: 80 words of workspace (see note) + * + * This function generates a SHA1 digest for a single 512-bit block. + * Be warned, it does not handle padding and message digest, do not + * confuse it with the full FIPS 180-1 digest algorithm for variable + * length messages. + * + * Note: If the hash is security sensitive, the caller should be sure + * to clear the workspace. This is left to the caller to avoid + * unnecessary clears between chained hashing operations. + */ +static void sha_transform(u32 *digest, const u8 *in, u32 *W) +{ + u32 a, b, c, d, e, t, i; + + for (i = 0; i < 16; i++) + W[i] = (in[4*i] << 24) | (in[4*i+1] << 16) | (in[4*i+2] << 8) | in[4*i+3]; + + for (i = 0; i < 64; i++) + W[i+16] = rol32(W[i+13] ^ W[i+8] ^ W[i+2] ^ W[i], 1); + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + for (i = 0; i < 20; i++) { + t = f1(b, c, d) + K1 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 40; i ++) { + t = f2(b, c, d) + K2 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 60; i ++) { + t = f3(b, c, d) + K3 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + for (; i < 80; i ++) { + t = f2(b, c, d) + K4 + rol32(a, 5) + e + W[i]; + e = d; d = c; c = rol32(b, 30); b = a; a = t; + } + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +void sha1_init(struct sha1_ctx *sctx) +{ + static const struct sha1_ctx initstate = { + 0, + { 0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0 }, + { 0, } + }; + + *sctx = initstate; +} + +void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len) +{ + unsigned int i, j; + u32 temp[SHA_WORKSPACE_WORDS]; + + j = (sctx->count >> 3) & 0x3f; + sctx->count += len << 3; + + if ((j + len) > 63) { + memcpy(&sctx->buffer[j], data, (i = 64-j)); + sha_transform(sctx->state, sctx->buffer, temp); + for ( ; i + 63 < len; i += 64) { + sha_transform(sctx->state, &data[i], temp); + } + j = 0; + } + else i = 0; + memcpy(&sctx->buffer[j], &data[i], len - i); +} + + +/* Add padding and return the message digest. */ +void sha1_final(struct sha1_ctx *sctx, u8 *out) +{ + u32 i, j, index, padlen; + u64 t; + u8 bits[8] = { 0, }; + static const u8 padding[64] = { 0x80, }; + + t = sctx->count; + bits[7] = 0xff & t; t>>=8; + bits[6] = 0xff & t; t>>=8; + bits[5] = 0xff & t; t>>=8; + bits[4] = 0xff & t; t>>=8; + bits[3] = 0xff & t; t>>=8; + bits[2] = 0xff & t; t>>=8; + bits[1] = 0xff & t; t>>=8; + bits[0] = 0xff & t; + + /* Pad out to 56 mod 64 */ + index = (sctx->count >> 3) & 0x3f; + padlen = (index < 56) ? (56 - index) : ((64+56) - index); + sha1_update(sctx, padding, padlen); + + /* Append length */ + sha1_update(sctx, bits, sizeof bits); + + /* Store state in digest */ + for (i = j = 0; i < 5; i++, j += 4) { + u32 t2 = sctx->state[i]; + out[j+3] = t2 & 0xff; t2>>=8; + out[j+2] = t2 & 0xff; t2>>=8; + out[j+1] = t2 & 0xff; t2>>=8; + out[j ] = t2 & 0xff; + } +} diff --git a/util.c b/util.c new file mode 100644 index 0000000..d3e24a1 --- /dev/null +++ b/util.c @@ -0,0 +1,57 @@ +/* + * Utility Functions + * + * (c) 2005--2006 Martin Mares + */ + +#include +#include +#include +#include + +#include "util.h" + +int verbose; + +void NONRET +die(char *c, ...) +{ + va_list args; + va_start(args, c); + fprintf(stderr, "mdup: "); + vfprintf(stderr, c, args); + fputc('\n', stderr); + va_end(args); + exit(1); +} + +void +verb(int level, char *c, ...) +{ + va_list args; + va_start(args, c); + if (verbose >= level) + { + vfprintf(stderr, c, args); + fputc('\n', stderr); + } + va_end(args); +} + +void * +xmalloc(uns size) +{ + void *buf = malloc(size); + if (!buf) + die("Unable to allocate %d bytes of memory", size); + return buf; +} + +void * +xrealloc(void *old, uns size) +{ + void *buf = realloc(old, size); + if (!buf) + die("Unable to allocate %d bytes of memory", size); + return buf; +} diff --git a/util.h b/util.h new file mode 100644 index 0000000..66c78ce --- /dev/null +++ b/util.h @@ -0,0 +1,49 @@ +/* + * Various Utility Functions (extracted from the UCW Library) + * + * (c) 2005 Martin Mares + */ + +#include + +#ifdef __GNUC__ +#define NONRET __attribute__((noreturn)) +#define UNUSED __attribute__((unused)) +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) +#else +#define NONRET +#define UNUSED +#define likely(x) (x) +#define ulikely(x) (x) +#endif + +#define STR2(c) #c +#define STR(c) STR2(c) + +typedef unsigned int uns; +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; + +/* util.c */ + +extern int verbose; + +void NONRET die(char *x, ...); +void verb(int level, char *x, ...); +void *xmalloc(uns size); +void *xrealloc(void *old, uns size); + +/* sha1.c */ + +struct sha1_ctx { + u64 count; + u32 state[5]; + u8 buffer[64]; +}; + +void sha1_init(struct sha1_ctx *ctx); +void sha1_update(struct sha1_ctx *sctx, const u8 *data, unsigned int len); +void sha1_final(struct sha1_ctx *sctx, u8 *out); -- 2.39.2