Moved all functions related to charsets to a separate file and

author Martin Mares <mj@ucw.cz>

Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)

committer Martin Mares <mj@ucw.cz>

Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)
author Martin Mares <mj@ucw.cz>
Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)
committer Martin Mares <mj@ucw.cz>
Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)
diff --git a/Makefile b/Makefile

index 9b28ac6de76243f49566f79e913962dd98f356bc..38cf8b1d83b6c224937b7b88dc5fdf56c514ab0e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -7,11 +7,11 @@ YEAR=2006
  
  all: cm
  
-cm: cm.o util.o rfc2047.o
+cm: cm.o util.o charset.o
  
  cm.o: cm.c clists.h util.h
  util.o: util.c util.h
-rfc2047.o: rfc2047.c util.h
+charset.o: charset.c util.h
  
  clean:
         rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core`
diff --git a/charset.c b/charset.c

new file mode 100644 (file)

index 0000000..606d8dc
--- /dev/null
+++ b/charset.c
@@ -0,0 +1,424 @@
+/*
+ *     Incoming Mail Checker: Charsets
+ *
+ *     (c) 2007 Martin Mares <mj@ucw.cz>
+ *
+ * The code for parsing rfc2047 encoding of headers has been adapted
+ * from the Mutt 1.5.16 MUA. Here is the original copyright message:
+ *
+ * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
+ * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
+ *
+ *     This program is free software; you can redistribute it and/or modify
+ *     it under the terms of the GNU General Public License as published by
+ *     the Free Software Foundation; either version 2 of the License, or
+ *     (at your option) any later version.
+ *
+ *     This program is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ *     You should have received a copy of the GNU General Public License
+ *     along with this program; if not, write to the Free Software
+ *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "util.h"
+#include "charset.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <iconv.h>
+
+static char *system_charset;
+
+#define strfcpy(A,B,C) strncpy(A,B,C), *(A+(C)-1)=0
+
+enum encoding {
+  ENCOTHER,
+  ENCQUOTEDPRINTABLE,
+  ENCBASE64,
+};
+
+static int Index_hex[128] = {
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+     0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+};
+
+static int Index_64[128] = {
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
+    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
+};
+
+#define hexval(c) Index_hex[(unsigned int)(c)]
+#define base64val(c) Index_64[(unsigned int)(c)]
+
+#define OPTIGNORELWS 0
+
+static int option(int opt UNUSED)
+{
+  return 1;
+}
+
+static size_t convert_string (char *f, size_t flen,
+                             const char *from, const char *to,
+                             char **t, size_t *tlen)
+{
+  iconv_t cd;
+  char *buf, *ob;
+  size_t obl, n;
+  int e;
+
+  cd = iconv_open (to, from);
+  if (cd == (iconv_t)(-1))
+    return (size_t)(-1);
+  obl = 4 * flen + 1;
+  ob = buf = xmalloc (obl);
+  n = iconv (cd, &f, &flen, &ob, &obl);
+  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
+  {
+    e = errno;
+    free(buf);
+    iconv_close (cd);
+    errno = e;
+    return (size_t)(-1);
+  }
+  *ob = '\0';
+
+  *tlen = ob - buf;
+
+  buf = xrealloc (buf, ob - buf + 1);
+  *t = buf;
+  iconv_close (cd);
+
+  return n;
+}
+
+static int rfc2047_decode_word (char *d, const char *s, size_t len)
+{
+  const char *pp, *pp1;
+  char *pd, *d0;
+  const char *t, *t1;
+  int enc = 0, count = 0;
+  char *charset = NULL;
+
+  pd = d0 = xmalloc (strlen (s));
+
+  for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
+  {
+    count++;
+    switch (count)
+    {
+      case 2:
+       /* ignore language specification a la RFC 2231 */
+       t = pp1;
+        if ((t1 = memchr (pp, '*', t - pp)))
+         t = t1;
+       charset = xmalloc (t - pp + 1);
+       memcpy (charset, pp, t - pp);
+       charset[t-pp] = '\0';
+       break;
+      case 3:
+       if (toupper ((unsigned char) *pp) == 'Q')
+         enc = ENCQUOTEDPRINTABLE;
+       else if (toupper ((unsigned char) *pp) == 'B')
+         enc = ENCBASE64;
+       else
+       {
+         free(charset);
+         free(d0);
+         return (-1);
+       }
+       break;
+      case 4:
+       if (enc == ENCQUOTEDPRINTABLE)
+       {
+         for (; pp < pp1; pp++)
+         {
+           if (*pp == '_')
+             *pd++ = ' ';
+           else if (*pp == '=' &&
+                    (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
+                    (!(pp[2] & ~127) && hexval(pp[2]) != -1))
+           {
+             *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
+             pp += 2;
+           }
+           else
+             *pd++ = *pp;
+         }
+         *pd = 0;
+       }
+       else if (enc == ENCBASE64)
+       {
+         int c, b = 0, k = 0;
+
+         for (; pp < pp1; pp++)
+         {
+           if (*pp == '=')
+             break;
+           if ((*pp & ~127) || (c = base64val(*pp)) == -1)
+             continue;
+           if (k + 6 >= 8)
+           {
+             k -= 2;
+             *pd++ = b | (c >> k);
+             b = c << (8 - k);
+           }
+           else
+           {
+             b |= c << (k + 2);
+             k += 6;
+           }
+         }
+         *pd = 0;
+       }
+       break;
+    }
+  }
+
+  size_t dlen;
+  if (charset && system_charset)
+    convert_string (d0, strlen(d0), charset, system_charset, &d0, &dlen);
+  strfcpy (d, d0, len);
+  free (charset);
+  free (d0);
+  return (0);
+}
+
+/*
+ * Find the start and end of the first encoded word in the string.
+ * We use the grammar in section 2 of RFC 2047, but the "encoding"
+ * must be B or Q. Also, we don't require the encoded word to be
+ * separated by linear-white-space (section 5(1)).
+ */
+static const char *find_encoded_word (const char *s, const char **x)
+{
+  const char *p, *q;
+
+  q = s;
+  while ((p = strstr (q, "=?")))
+  {
+    for (q = p + 2;
+        0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
+        q++)
+      ;
+    if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
+      continue;
+    for (q = q + 3; 0x20 < *q && *q < 0x7f && *q != '?'; q++)
+      ;
+    if (q[0] != '?' || q[1] != '=')
+    {
+      --q;
+      continue;
+    }
+
+    *x = q + 2;
+    return p;
+  }
+
+  return 0;
+}
+
+/* return length of linear-white-space */
+static size_t lwslen (const char *s, size_t n)
+{
+  const char *p = s;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  for (; p < s + n; p++)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(p - s);
+      break;
+    }
+  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
+    len = (size_t)0;
+  return len;
+}
+
+/* return length of linear-white-space : reverse */
+static size_t lwsrlen (const char *s, size_t n)
+{
+  const char *p = s + n - 1;
+  size_t len = n;
+
+  if (n <= 0)
+    return 0;
+
+  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
+    return (size_t)0;
+
+  for (; p >= s; p--)
+    if (!strchr (" \t\r\n", *p))
+    {
+      len = (size_t)(s + n - 1 - p);
+      break;
+    }
+  return len;
+}
+
+/* try to decode anything that looks like a valid RFC2047 encoded
+ * header field, ignoring RFC822 parsing rules
+ */
+static void rfc2047_decode (char **pd)
+{
+  const char *p, *q;
+  size_t m, n;
+  int found_encoded = 0;
+  char *d0, *d;
+  const char *s = *pd;
+  size_t dlen;
+
+  if (!s || !*s)
+    return;
+
+  dlen = 4 * strlen (s); /* should be enough */
+  d = d0 = xmalloc (dlen + 1);
+
+  while (*s && dlen > 0)
+  {
+    if (!(p = find_encoded_word (s, &q)))
+    {
+      /* no encoded words */
+      if (option (OPTIGNORELWS))
+      {
+        n = strlen (s);
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          s += m;
+        }
+      }
+      strncpy (d, s, dlen);
+      d += dlen;
+      break;
+    }
+
+    if (p != s)
+    {
+      n = (size_t) (p - s);
+      /* ignore spaces between encoded word
+       * and linear-white-space between encoded word and *text */
+      if (option (OPTIGNORELWS))
+      {
+        if (found_encoded && (m = lwslen (s, n)) != 0)
+        {
+          if (m != n)
+            *d = ' ', d++, dlen--;
+          n -= m, s += m;
+        }
+
+        if ((m = n - lwsrlen (s, n)) != 0)
+        {
+          if (m > dlen)
+            m = dlen;
+          memcpy (d, s, m);
+          d += m;
+          dlen -= m;
+          if (m != n)
+            *d = ' ', d++, dlen--;
+        }
+      }
+      else if (!found_encoded || strspn (s, " \t\r\n") != n)
+      {
+       if (n > dlen)
+         n = dlen;
+       memcpy (d, s, n);
+       d += n;
+       dlen -= n;
+      }
+    }
+
+    rfc2047_decode_word (d, p, dlen);
+    found_encoded = 1;
+    s = q;
+    n = strlen (d);
+    dlen -= n;
+    d += n;
+  }
+  *d = 0;
+
+  free (*pd);
+  *pd = d0;
+}
+
+/* Initialize the whole machinery */
+void
+charset_init(void)
+{
+  setlocale(LC_CTYPE, "");
+  system_charset = nl_langinfo(CODESET);
+  if (!system_charset[0])
+    system_charset = NULL;
+  debug("Charset is %s\n", system_charset);
+}
+
+static void
+do_add_snippet(char **ppos, char *term, unsigned char *add)
+{
+  char *pos = *ppos;
+  int space = 1;
+  mbtowc(NULL, NULL, 0);
+
+  while (pos + MB_CUR_MAX < term)
+    {
+      wchar_t c;
+      int l = mbtowc(&c, add, MB_CUR_MAX);
+      if (!l)
+       break;
+      if (l < 0)
+       {
+         l = 1;
+         c = '?';
+       }
+      add += l;
+      if (!iswprint(c))
+       c = '?';
+      if (iswspace(c))
+       {
+         if (space)
+           continue;
+         space = 1;
+       }
+      else
+       space = 0;
+      l = wctomb(pos, c);
+      pos += l;
+    }
+  *ppos = pos;
+  *pos = 0;
+}
+
+void
+add_snippet(char **ppos, char *term, char *add)
+{
+  char *buf = xstrdup(add);
+  rfc2047_decode(&buf);
+  do_add_snippet(ppos, term, buf);
+  free(buf);
+}
diff --git a/charset.h b/charset.h

new file mode 100644 (file)

index 0000000..bb3bf27
--- /dev/null
+++ b/charset.h
@@ -0,0 +1,4 @@
+/* Charset handling */
+
+void add_snippet(char **ppos, char *term, char *add);
+void charset_init(void);
diff --git a/cm.c b/cm.c

index 76db2488bd547a3e653fc9a24da4b6af60ef8b60..963fe049541f4b031d53c3fc5e20f70bc3e97600 100644 (file)
--- a/cm.c
+++ b/cm.c
@@ -20,7 +20,7 @@
  
  #include "util.h"
  #include "clists.h"
-#include "rfc2047.h"
+#include "charset.h"
  
  static int check_interval = 30;
  static int force_refresh;
@@ -65,7 +65,7 @@ struct mbox {
    time_t last_time;
    int last_size, last_pos;
    int total, new, flagged;
-  int last_total, last_new;
+  int last_total, last_new, last_flagged;
    int last_beep_new;
    int force_refresh;
    int snippet_is_new;
@@ -204,49 +204,6 @@ mbox_visible_p(struct mbox *b)
    return 1;
  }
  
-static void
-do_add_snippet(char **ppos, char *term, unsigned char *add)
-{
-  char *pos = *ppos;
-  int space = 1;
-  while (*add && pos < term)
-    {
-      if (*add <= ' ')
-       {
-         if (!space)
-           *pos++ = ' ';
-         space = 1;
-       }
-      else if (*add >= 0x7f)
-       {
-         *pos++ = '?';
-         space = 0;
-       }
-      else
-       {
-         *pos++ = *add;
-         space = 0;
-       }
-      add++;
-    }
-  *ppos = pos;
-  *pos = 0;
-}
-
-static void
-add_snippet(char **ppos, char *term, unsigned char *add)
-{
-#if 1
-  char *buf = xmalloc(strlen(add) + 1);
-  strcpy(buf, add);
-  rfc2047_decode(&buf);
-  do_add_snippet(ppos, term, buf);
-  free(buf);
-#else
-  do_add_snippet(ppos, term, add);
-#endif
-}
-
  static void
  prepare_snippet(struct mbox *b, char *sender, char *subject)
  {
@@ -342,7 +299,7 @@ scan_mbox(struct mbox *b, struct stat *st)
  
    if (!st->st_size)
      {
-      b->total = b->new = 0;
+      b->total = b->new = b->flagged = 0;
        b->last_pos = 0;
        return;
      }
@@ -353,7 +310,7 @@ scan_mbox(struct mbox *b, struct stat *st)
    if (mb_fd < 0)
      {
        debug("[open failed: %m] ");
-      b->total = b->new = -1;
+      b->total = b->new = b->flagged = -1;
        return;
      }
    mb_reset(0);
@@ -378,17 +335,18 @@ scan_mbox(struct mbox *b, struct stat *st)
        if (!mb_check(from+1, 5))
         {
           debug("[inconsistent] ");
-         b->total = b->new = -1;
+         b->total = b->new = b->flagged = -1;
           goto done;
         }
-      b->total = b->new = 0;
-      b->last_total = b->last_new = 0;
+      b->total = b->new = b->flagged = 0;
+      b->last_total = b->last_new = b->last_flagged = 0;
        b->snippet_is_new = 0;
      }
    else
      {
        b->total = b->last_total;
        b->new = b->last_new;
+      b->flagged = b->last_flagged;
      }
  
    for(;;)
@@ -398,6 +356,7 @@ scan_mbox(struct mbox *b, struct stat *st)
         b->last_pos--;          // last_pos should be the previous \n character
        b->last_total = b->total;
        b->last_new = b->new;
+      b->last_flagged = b->flagged;
        while ((c = mb_get()) >= 0 && c != '\n')
         ;
  
@@ -530,7 +489,7 @@ scan(void)
         b->force_refresh = 1;
        if (stat(b->path, &st) < 0)
         {
-         b->total = b->new = -1;
+         b->total = b->new = b->flagged = -1;
           debug("%m\n");
         }
        else if (!b->last_time || st.st_mtime != b->last_time || st.st_size != b->last_size || b->force_refresh)
@@ -542,7 +501,7 @@ scan(void)
           scan_mbox(b, &st);
           b->last_time = st.st_mtime;
           b->last_size = st.st_size;
-         debug("%d %d (stopped at %d of %d)\n", b->total, b->new, b->last_pos, b->last_size);
+         debug("%d %d %d (stopped at %d of %d)\n", b->total, b->new, b->flagged, b->last_pos, b->last_size);
  
           b->scanning = 0;
           redraw_line(b->index);
@@ -934,7 +893,7 @@ main(int argc, char **argv)
    while (optind < argc)
      add_pattern(argv[optind++]);
  
-  rfc2047_init();
+  charset_init();
    term_init();
    scan_and_redraw();
    next_active(0, 1);
diff --git a/rfc2047.c b/rfc2047.c

deleted file mode 100644 (file)

index c94e259..0000000
--- a/rfc2047.c
+++ /dev/null
@@ -1,375 +0,0 @@
-/*
- * This code for parsing rfc2047 encoding of headers has been adapted
- * from the Mutt 1.5.16 MUA by Martin Mares <mj@ucw.cz>.
- *
- * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
- * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
- *
- *     This program is free software; you can redistribute it and/or modify
- *     it under the terms of the GNU General Public License as published by
- *     the Free Software Foundation; either version 2 of the License, or
- *     (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, write to the Free Software
- *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#include "util.h"
-#include "rfc2047.h"
-
-#include <sys/types.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <locale.h>
-#include <langinfo.h>
-#include <iconv.h>
-
-static char *system_charset;
-
-#define assert(x)
-#define ICONV_CONST
-#define strfcpy(A,B,C) strncpy(A,B,C), *(A+(C)-1)=0
-
-enum encoding {
-  ENCOTHER,
-  ENCQUOTEDPRINTABLE,
-  ENCBASE64,
-};
-
-static int Index_hex[128] = {
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-     0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
-    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
-};
-
-static int Index_64[128] = {
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
-    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
-    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
-    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
-    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
-    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
-};
-
-#define hexval(c) Index_hex[(unsigned int)(c)]
-#define base64val(c) Index_64[(unsigned int)(c)]
-
-#define OPTIGNORELWS 0
-
-static int option(int opt UNUSED)
-{
-  return 1;
-}
-
-static size_t convert_string (ICONV_CONST char *f, size_t flen,
-                             const char *from, const char *to,
-                             char **t, size_t *tlen)
-{
-  iconv_t cd;
-  char *buf, *ob;
-  size_t obl, n;
-  int e;
-
-  cd = iconv_open (to, from);
-  if (cd == (iconv_t)(-1))
-    return (size_t)(-1);
-  obl = 4 * flen + 1;
-  ob = buf = xmalloc (obl);
-  n = iconv (cd, &f, &flen, &ob, &obl);
-  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
-  {
-    e = errno;
-    free(buf);
-    iconv_close (cd);
-    errno = e;
-    return (size_t)(-1);
-  }
-  *ob = '\0';
-
-  *tlen = ob - buf;
-
-  buf = xrealloc (buf, ob - buf + 1);
-  *t = buf;
-  iconv_close (cd);
-
-  return n;
-}
-
-static int rfc2047_decode_word (char *d, const char *s, size_t len)
-{
-  const char *pp, *pp1;
-  char *pd, *d0;
-  const char *t, *t1;
-  int enc = 0, count = 0;
-  char *charset = NULL;
-
-  pd = d0 = xmalloc (strlen (s));
-
-  for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
-  {
-    count++;
-    switch (count)
-    {
-      case 2:
-       /* ignore language specification a la RFC 2231 */
-       t = pp1;
-        if ((t1 = memchr (pp, '*', t - pp)))
-         t = t1;
-       charset = xmalloc (t - pp + 1);
-       memcpy (charset, pp, t - pp);
-       charset[t-pp] = '\0';
-       break;
-      case 3:
-       if (toupper ((unsigned char) *pp) == 'Q')
-         enc = ENCQUOTEDPRINTABLE;
-       else if (toupper ((unsigned char) *pp) == 'B')
-         enc = ENCBASE64;
-       else
-       {
-         free(charset);
-         free(d0);
-         return (-1);
-       }
-       break;
-      case 4:
-       if (enc == ENCQUOTEDPRINTABLE)
-       {
-         for (; pp < pp1; pp++)
-         {
-           if (*pp == '_')
-             *pd++ = ' ';
-           else if (*pp == '=' &&
-                    (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
-                    (!(pp[2] & ~127) && hexval(pp[2]) != -1))
-           {
-             *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
-             pp += 2;
-           }
-           else
-             *pd++ = *pp;
-         }
-         *pd = 0;
-       }
-       else if (enc == ENCBASE64)
-       {
-         int c, b = 0, k = 0;
-
-         for (; pp < pp1; pp++)
-         {
-           if (*pp == '=')
-             break;
-           if ((*pp & ~127) || (c = base64val(*pp)) == -1)
-             continue;
-           if (k + 6 >= 8)
-           {
-             k -= 2;
-             *pd++ = b | (c >> k);
-             b = c << (8 - k);
-           }
-           else
-           {
-             b |= c << (k + 2);
-             k += 6;
-           }
-         }
-         *pd = 0;
-       }
-       break;
-    }
-  }
-
-  size_t dlen;
-  if (charset && system_charset)
-    convert_string (d0, strlen(d0), charset, system_charset, &d0, &dlen);
-  strfcpy (d, d0, len);
-  free (charset);
-  free (d0);
-  return (0);
-}
-
-/*
- * Find the start and end of the first encoded word in the string.
- * We use the grammar in section 2 of RFC 2047, but the "encoding"
- * must be B or Q. Also, we don't require the encoded word to be
- * separated by linear-white-space (section 5(1)).
- */
-static const char *find_encoded_word (const char *s, const char **x)
-{
-  const char *p, *q;
-
-  q = s;
-  while ((p = strstr (q, "=?")))
-  {
-    for (q = p + 2;
-        0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
-        q++)
-      ;
-    if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
-      continue;
-    for (q = q + 3; 0x20 < *q && *q < 0x7f && *q != '?'; q++)
-      ;
-    if (q[0] != '?' || q[1] != '=')
-    {
-      --q;
-      continue;
-    }
-
-    *x = q + 2;
-    return p;
-  }
-
-  return 0;
-}
-
-/* return length of linear-white-space */
-static size_t lwslen (const char *s, size_t n)
-{
-  const char *p = s;
-  size_t len = n;
-
-  if (n <= 0)
-    return 0;
-
-  for (; p < s + n; p++)
-    if (!strchr (" \t\r\n", *p))
-    {
-      len = (size_t)(p - s);
-      break;
-    }
-  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
-    len = (size_t)0;
-  return len;
-}
-
-/* return length of linear-white-space : reverse */
-static size_t lwsrlen (const char *s, size_t n)
-{
-  const char *p = s + n - 1;
-  size_t len = n;
-
-  if (n <= 0)
-    return 0;
-
-  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
-    return (size_t)0;
-
-  for (; p >= s; p--)
-    if (!strchr (" \t\r\n", *p))
-    {
-      len = (size_t)(s + n - 1 - p);
-      break;
-    }
-  return len;
-}
-
-/* try to decode anything that looks like a valid RFC2047 encoded
- * header field, ignoring RFC822 parsing rules
- */
-void rfc2047_decode (char **pd)
-{
-  const char *p, *q;
-  size_t m, n;
-  int found_encoded = 0;
-  char *d0, *d;
-  const char *s = *pd;
-  size_t dlen;
-
-  if (!s || !*s)
-    return;
-
-  dlen = 4 * strlen (s); /* should be enough */
-  d = d0 = xmalloc (dlen + 1);
-
-  while (*s && dlen > 0)
-  {
-    if (!(p = find_encoded_word (s, &q)))
-    {
-      /* no encoded words */
-      if (option (OPTIGNORELWS))
-      {
-        n = strlen (s);
-        if (found_encoded && (m = lwslen (s, n)) != 0)
-        {
-          if (m != n)
-            *d = ' ', d++, dlen--;
-          s += m;
-        }
-      }
-      strncpy (d, s, dlen);
-      d += dlen;
-      break;
-    }
-
-    if (p != s)
-    {
-      n = (size_t) (p - s);
-      /* ignore spaces between encoded word
-       * and linear-white-space between encoded word and *text */
-      if (option (OPTIGNORELWS))
-      {
-        if (found_encoded && (m = lwslen (s, n)) != 0)
-        {
-          if (m != n)
-            *d = ' ', d++, dlen--;
-          n -= m, s += m;
-        }
-
-        if ((m = n - lwsrlen (s, n)) != 0)
-        {
-          if (m > dlen)
-            m = dlen;
-          memcpy (d, s, m);
-          d += m;
-          dlen -= m;
-          if (m != n)
-            *d = ' ', d++, dlen--;
-        }
-      }
-      else if (!found_encoded || strspn (s, " \t\r\n") != n)
-      {
-       if (n > dlen)
-         n = dlen;
-       memcpy (d, s, n);
-       d += n;
-       dlen -= n;
-      }
-    }
-
-    rfc2047_decode_word (d, p, dlen);
-    found_encoded = 1;
-    s = q;
-    n = strlen (d);
-    dlen -= n;
-    d += n;
-  }
-  *d = 0;
-
-  free (*pd);
-  *pd = d0;
-}
-
-/* Initialize the whole machinery */
-void rfc2047_init(void)
-{
-  setlocale(LC_CTYPE, "");
-  system_charset = nl_langinfo(CODESET);
-  if (!system_charset[0])
-    system_charset = NULL;
-  debug("Charset is %s\n", system_charset);
-}
diff --git a/rfc2047.h b/rfc2047.h

deleted file mode 100644 (file)

index 0d1750b..0000000
--- a/rfc2047.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * This code for parsing rfc2047 encoding of headers has been adapted
- * from the Mutt 1.5.16 MUA by Martin Mares <mj@ucw.cz>.
- *
- * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
- *
- *     This program is free software; you can redistribute it and/or modify
- *     it under the terms of the GNU General Public License as published by
- *     the Free Software Foundation; either version 2 of the License, or
- *     (at your option) any later version.
- *
- *     This program is distributed in the hope that it will be useful,
- *     but WITHOUT ANY WARRANTY; without even the implied warranty of
- *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *     GNU General Public License for more details.
- *
- *     You should have received a copy of the GNU General Public License
- *     along with this program; if not, write to the Free Software
- *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-void rfc2047_decode (char **);
-void rfc2047_init (void);
author	Martin Mares <mj@ucw.cz>
	Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)
committer	Martin Mares <mj@ucw.cz>
	Mon, 25 Jun 2007 16:43:43 +0000 (18:43 +0200)
Makefile		patch \| blob \| history
charset.c	[new file with mode: 0644]	patch \| blob
charset.h	[new file with mode: 0644]	patch \| blob
cm.c		patch \| blob \| history
rfc2047.c	[deleted file]	patch \| blob \| history
rfc2047.h	[deleted file]	patch \| blob \| history