First attempt at rfc2047 parsing.

author Martin Mares <mj@ucw.cz>

Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)

committer Martin Mares <mj@ucw.cz>

Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)
author Martin Mares <mj@ucw.cz>
Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)
committer Martin Mares <mj@ucw.cz>
Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)
diff --git a/Makefile b/Makefile

index bf1f5a954d30475d1400254ba9d29e963e3a0039..9b28ac6de76243f49566f79e913962dd98f356bc 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -7,10 +7,11 @@ YEAR=2006
  
  all: cm
  
-cm: cm.o util.o
+cm: cm.o util.o rfc2047.o
  
  cm.o: cm.c clists.h util.h
  util.o: util.c util.h
+rfc2047.o: rfc2047.c util.h
  
  clean:
         rm -f `find . -name "*~" -or -name "*.[oa]" -or -name "\#*\#" -or -name TAGS -or -name core`
diff --git a/cm.c b/cm.c

index cce8c273e4fa6e735761e33a9296cd2f7d09c6bd..76db2488bd547a3e653fc9a24da4b6af60ef8b60 100644 (file)
--- a/cm.c
+++ b/cm.c
@@ -20,6 +20,7 @@
  
  #include "util.h"
  #include "clists.h"
+#include "rfc2047.h"
  
  static int check_interval = 30;
  static int force_refresh;
@@ -204,7 +205,7 @@ mbox_visible_p(struct mbox *b)
  }
  
  static void
-add_snippet(char **ppos, char *term, unsigned char *add)
+do_add_snippet(char **ppos, char *term, unsigned char *add)
  {
    char *pos = *ppos;
    int space = 1;
@@ -232,6 +233,20 @@ add_snippet(char **ppos, char *term, unsigned char *add)
    *pos = 0;
  }
  
+static void
+add_snippet(char **ppos, char *term, unsigned char *add)
+{
+#if 1
+  char *buf = xmalloc(strlen(add) + 1);
+  strcpy(buf, add);
+  rfc2047_decode(&buf);
+  do_add_snippet(ppos, term, buf);
+  free(buf);
+#else
+  do_add_snippet(ppos, term, add);
+#endif
+}
+
  static void
  prepare_snippet(struct mbox *b, char *sender, char *subject)
  {
@@ -919,6 +934,7 @@ main(int argc, char **argv)
    while (optind < argc)
      add_pattern(argv[optind++]);
  
+  rfc2047_init();
    term_init();
    scan_and_redraw();
    next_active(0, 1);
diff --git a/rfc2047.c b/rfc2047.c

index f61784e03ff5b17793782a6e20173e5aa76ceffa..c94e2597c9b447b2e5861797a9157464386a3958 100644 (file)
--- a/rfc2047.c
+++ b/rfc2047.c
@@ -1,57 +1,81 @@
  /*
+ * This code for parsing rfc2047 encoding of headers has been adapted
+ * from the Mutt 1.5.16 MUA by Martin Mares <mj@ucw.cz>.
+ *
   * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
- * 
+ *
   *     This program is free software; you can redistribute it and/or modify
   *     it under the terms of the GNU General Public License as published by
   *     the Free Software Foundation; either version 2 of the License, or
   *     (at your option) any later version.
- * 
+ *
   *     This program is distributed in the hope that it will be useful,
   *     but WITHOUT ANY WARRANTY; without even the implied warranty of
   *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *     GNU General Public License for more details.
- * 
+ *
   *     You should have received a copy of the GNU General Public License
   *     along with this program; if not, write to the Free Software
   *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */ 
-
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
+ */
  
-#include "mutt.h"
-#include "mime.h"
-#include "charset.h"
+#include "util.h"
  #include "rfc2047.h"
  
+#include <sys/types.h>
  #include <ctype.h>
  #include <errno.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <iconv.h>
  
-/* If you are debugging this file, comment out the following line. */
-/*#define NDEBUG*/
+static char *system_charset;
  
-#ifdef NDEBUG
  #define assert(x)
-#else
-#include <assert.h>
-#endif
-
-#define ENCWORD_LEN_MAX 75
-#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */
-
-#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
-
-#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
-
-extern char RFC822Specials[];
-
-typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
-                            const char *);
+#define ICONV_CONST
+#define strfcpy(A,B,C) strncpy(A,B,C), *(A+(C)-1)=0
+
+enum encoding {
+  ENCOTHER,
+  ENCQUOTEDPRINTABLE,
+  ENCBASE64,
+};
+
+static int Index_hex[128] = {
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+     0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+};
+
+static int Index_64[128] = {
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+    -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+    52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
+    -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
+    15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+    -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+    41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
+};
+
+#define hexval(c) Index_hex[(unsigned int)(c)]
+#define base64val(c) Index_64[(unsigned int)(c)]
+
+#define OPTIGNORELWS 0
+
+static int option(int opt UNUSED)
+{
+  return 1;
+}
  
  static size_t convert_string (ICONV_CONST char *f, size_t flen,
                               const char *from, const char *to,
@@ -62,566 +86,31 @@ static size_t convert_string (ICONV_CONST char *f, size_t flen,
    size_t obl, n;
    int e;
  
-  cd = mutt_iconv_open (to, from, 0);
+  cd = iconv_open (to, from);
    if (cd == (iconv_t)(-1))
      return (size_t)(-1);
    obl = 4 * flen + 1;
-  ob = buf = safe_malloc (obl);
+  ob = buf = xmalloc (obl);
    n = iconv (cd, &f, &flen, &ob, &obl);
    if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
    {
      e = errno;
-    FREE (&buf);
+    free(buf);
      iconv_close (cd);
      errno = e;
      return (size_t)(-1);
    }
    *ob = '\0';
-  
+
    *tlen = ob - buf;
  
-  safe_realloc (&buf, ob - buf + 1);
+  buf = xrealloc (buf, ob - buf + 1);
    *t = buf;
    iconv_close (cd);
  
    return n;
  }
  
-int convert_nonmime_string (char **ps)
-{
-  const char *c, *c1;
-
-  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
-  {
-    char *u = *ps;
-    char *s;
-    char *fromcode;
-    size_t m, n;
-    size_t ulen = mutt_strlen (*ps);
-    size_t slen;
-
-    if (!u || !*u)
-      return 0;
-
-    c1 = strchr (c, ':');
-    n = c1 ? c1 - c : mutt_strlen (c);
-    if (!n)
-      return 0;
-    fromcode = safe_malloc (n + 1);
-    strfcpy (fromcode, c, n + 1);
-    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
-    FREE (&fromcode);
-    if (m != (size_t)(-1))
-    {
-      FREE (ps); /* __FREE_CHECKED__ */
-      *ps = s;
-      return 0;
-    }
-  }
-  mutt_convert_string (ps,
-      (const char *)mutt_get_default_charset (AssumedCharset),
-      Charset, M_ICONV_HOOK_FROM);
-  return -1;
-}
-
-char *mutt_choose_charset (const char *fromcode, const char *charsets,
-                     char *u, size_t ulen, char **d, size_t *dlen)
-{
-  char canonical_buff[LONG_STRING];
-  char *e = 0, *tocode = 0;
-  size_t elen = 0, bestn = 0;
-  const char *p, *q;
-
-  for (p = charsets; p; p = q ? q + 1 : 0)
-  {
-    char *s, *t;
-    size_t slen, n;
-
-    q = strchr (p, ':');
-
-    n = q ? q - p : strlen (p);
-
-    if (!n ||
-       /* Assume that we never need more than 12 characters of
-          encoded-text to encode a single character. */
-       n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12))
-      continue;
-
-    t = safe_malloc (n + 1);
-    memcpy (t, p, n);
-    t[n] = '\0';
-
-    n = convert_string (u, ulen, fromcode, t, &s, &slen);
-    if (n == (size_t)(-1))
-      continue;
-
-    if (!tocode || n < bestn)
-    {
-      bestn = n;
-      FREE (&tocode);
-      tocode = t;
-      if (d)
-      {
-       FREE (&e);
-       e = s;
-      }
-      else
-       FREE (&s);
-      elen = slen;
-      if (!bestn)
-       break;
-    }
-    else
-    {
-      FREE (&t);
-      FREE (&s);
-    }
-  }
-  if (tocode)
-  {
-    if (d)
-      *d = e;
-    if (dlen)
-      *dlen = elen;
-    
-    mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
-    mutt_str_replace (&tocode, canonical_buff);
-  }
-  return tocode;
-}
-
-static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
-                        const char *tocode)
-{
-  char *s0 = s;
-
-  memcpy (s, "=?", 2), s += 2;
-  memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
-  memcpy (s, "?B?", 3), s += 3;
-  for (;;)
-  {
-    if (!dlen)
-      break;
-    else if (dlen == 1)
-    {
-      *s++ = B64Chars[(*d >> 2) & 0x3f];
-      *s++ = B64Chars[(*d & 0x03) << 4];
-      *s++ = '=';
-      *s++ = '=';
-      break;
-    }
-    else if (dlen == 2)
-    {
-      *s++ = B64Chars[(*d >> 2) & 0x3f];
-      *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
-      *s++ = B64Chars[(d[1] & 0x0f) << 2];
-      *s++ = '=';
-      break;
-    }
-    else
-    {
-      *s++ = B64Chars[(*d >> 2) & 0x3f];
-      *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
-      *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
-      *s++ = B64Chars[d[2] & 0x3f];
-      d += 3, dlen -= 3;
-    }
-  }
-  memcpy (s, "?=", 2), s += 2;
-  return s - s0;
-}
-
-static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
-                        const char *tocode)
-{
-  char hex[] = "0123456789ABCDEF";
-  char *s0 = s;
-
-  memcpy (s, "=?", 2), s += 2;
-  memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
-  memcpy (s, "?Q?", 3), s += 3;
-  while (dlen--)
-  {
-    unsigned char c = *d++;
-    if (c == ' ')
-      *s++ = '_';
-    else if (c >= 0x7f || c < 0x20 || c == '_' ||  strchr (MimeSpecials, c))
-    {
-      *s++ = '=';
-      *s++ = hex[(c & 0xf0) >> 4];
-      *s++ = hex[c & 0x0f];
-    }
-    else
-      *s++ = c;
-  }
-  memcpy (s, "?=", 2), s += 2;
-  return s - s0;
-}
-
-/*
- * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
- * be converted to an encoded word of length *wlen using *encoder.
- * Otherwise return an upper bound on the maximum length of the data
- * which could be converted.
- * The data is converted from fromcode (which must be stateless) to
- * tocode, unless fromcode is 0, in which case the data is assumed to
- * be already in tocode, which should be 8-bit and stateless.
- */
-static size_t try_block (ICONV_CONST char *d, size_t dlen,
-                        const char *fromcode, const char *tocode,
-                        encoder_t *encoder, size_t *wlen)
-{
-  char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
-  iconv_t cd;
-  ICONV_CONST char *ib;
-  char *ob, *p;
-  size_t ibl, obl;
-  int count, len, len_b, len_q;
-
-  if (fromcode)
-  {
-    cd = mutt_iconv_open (tocode, fromcode, 0);
-    assert (cd != (iconv_t)(-1));
-    ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
-    if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
-       iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
-    {
-      assert (errno == E2BIG);
-      iconv_close (cd);
-      assert (ib > d);
-      return (ib - d == dlen) ? dlen : ib - d + 1;
-    }
-    iconv_close (cd);
-  }
-  else
-  {
-    if (dlen > sizeof (buf1) - strlen (tocode))
-      return sizeof (buf1) - strlen (tocode) + 1;
-    memcpy (buf1, d, dlen);
-    ob = buf1 + dlen;
-  }
-
-  count = 0;
-  for (p = buf1; p < ob; p++)
-  {
-    unsigned char c = *p;
-    assert (strchr (MimeSpecials, '?'));
-    if (c >= 0x7f || c < 0x20 || *p == '_' ||
-       (c != ' ' && strchr (MimeSpecials, *p)))
-      ++count;
-  }
-
-  len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
-  len_b = len + (((ob - buf1) + 2) / 3) * 4;
-  len_q = len + (ob - buf1) + 2 * count;
-
-  /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
-  if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
-    len_q = ENCWORD_LEN_MAX + 1;
-
-  if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
-  {
-    *encoder = b_encoder;
-    *wlen = len_b;
-    return 0;
-  }
-  else if (len_q <= ENCWORD_LEN_MAX)
-  {
-    *encoder = q_encoder;
-    *wlen = len_q;
-    return 0;
-  }
-  else
-    return dlen;
-}
-
-/*
- * Encode the data (d, dlen) into s using the encoder.
- * Return the length of the encoded word.
- */
-static size_t encode_block (char *s, char *d, size_t dlen,
-                           const char *fromcode, const char *tocode,
-                           encoder_t encoder)
-{
-  char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
-  iconv_t cd;
-  ICONV_CONST char *ib;
-  char *ob;
-  size_t ibl, obl, n1, n2;
-
-  if (fromcode)
-  {
-    cd = mutt_iconv_open (tocode, fromcode, 0);
-    assert (cd != (iconv_t)(-1));
-    ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
-    n1 = iconv (cd, &ib, &ibl, &ob, &obl);
-    n2 = iconv (cd, 0, 0, &ob, &obl);
-    assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
-    iconv_close (cd);
-    return (*encoder) (s, buf1, ob - buf1, tocode);
-  }
-  else
-    return (*encoder) (s, d, dlen, tocode);
-}
-
-/*
- * Discover how much of the data (d, dlen) can be converted into
- * a single encoded word. Return how much data can be converted,
- * and set the length *wlen of the encoded word and *encoder.
- * We start in column col, which limits the length of the word.
- */
-static size_t choose_block (char *d, size_t dlen, int col,
-                           const char *fromcode, const char *tocode,
-                           encoder_t *encoder, size_t *wlen)
-{
-  size_t n, nn;
-  int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
-
-  n = dlen;
-  for (;;)
-  {
-    assert (d + n > d);
-    nn = try_block (d, n, fromcode, tocode, encoder, wlen);
-    if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
-      break;
-    n = (nn ? nn : n) - 1;
-    assert (n > 0);
-    if (utf8)
-      while (n > 1 && CONTINUATION_BYTE(d[n]))
-       --n;
-  }
-  return n;
-}
-
-/*
- * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
- * allocated buffer (e, elen). The input data is in charset fromcode
- * and is converted into a charset chosen from charsets.
- * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
- * failed, otherwise 0. If conversion failed, fromcode is assumed to be
- * compatible with us-ascii and the original data is used.
- * The input data is assumed to be a single line starting at column col;
- * if col is non-zero, the preceding character was a space.
- */
-static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
-                          const char *fromcode, const char *charsets,
-                          char **e, size_t *elen, char *specials)
-{
-  int ret = 0;
-  char *buf;
-  size_t bufpos, buflen;
-  char *u, *t0, *t1, *t;
-  char *s0, *s1;
-  size_t ulen, r, n, wlen;
-  encoder_t encoder;
-  char *tocode1 = 0;
-  const char *tocode;
-  char *icode = "utf-8";
-
-  /* Try to convert to UTF-8. */
-  if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
-  {
-    ret = 1; 
-    icode = 0;
-    u = safe_malloc ((ulen = dlen) + 1);
-    memcpy (u, d, dlen);
-    u[ulen] = 0;
-  }
-
-  /* Find earliest and latest things we must encode. */
-  s0 = s1 = t0 = t1 = 0;
-  for (t = u; t < u + ulen; t++)
-  {
-    if ((*t & 0x80) || 
-       (*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1)))))
-    {
-      if (!t0) t0 = t;
-      t1 = t;
-    }
-    else if (specials && strchr (specials, *t))
-    {
-      if (!s0) s0 = t;
-      s1 = t;
-    }
-  }
-
-  /* If we have something to encode, include RFC822 specials */
-  if (t0 && s0 && s0 < t0)
-    t0 = s0;
-  if (t1 && s1 && s1 > t1)
-    t1 = s1;
-
-  if (!t0)
-  {
-    /* No encoding is required. */
-    *e = u;
-    *elen = ulen;
-    return ret;
-  }
-
-  /* Choose target charset. */
-  tocode = fromcode;
-  if (icode)
-  {
-    if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
-      tocode = tocode1;
-    else
-      ret = 2, icode = 0;
-  }
-
-  /* Hack to avoid labelling 8-bit data as us-ascii. */
-  if (!icode && mutt_is_us_ascii (tocode))
-    tocode = "unknown-8bit";
-  
-  /* Adjust t0 for maximum length of line. */
-  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
-  if (t < u)  t = u;
-  if (t < t0) t0 = t;
-  
-
-  /* Adjust t0 until we can encode a character after a space. */
-  for (; t0 > u; t0--)
-  {
-    if (!HSPACE(*(t0-1)))
-      continue;
-    t = t0 + 1;
-    if (icode)
-      while (t < u + ulen && CONTINUATION_BYTE(*t))
-       ++t;
-    if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
-       col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
-      break;
-  }
-
-  /* Adjust t1 until we can encode a character before a space. */
-  for (; t1 < u + ulen; t1++)
-  {
-    if (!HSPACE(*t1))
-      continue;
-    t = t1 - 1;
-    if (icode)
-      while (CONTINUATION_BYTE(*t))
-       --t;
-    if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
-       1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
-      break;
-  }
-
-  /* We shall encode the region [t0,t1). */
-
-  /* Initialise the output buffer with the us-ascii prefix. */
-  buflen = 2 * ulen;
-  buf = safe_malloc (buflen);
-  bufpos = t0 - u;
-  memcpy (buf, u, t0 - u);
-
-  col += t0 - u;
-
-  t = t0;
-  for (;;)
-  {
-    /* Find how much we can encode. */
-    n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
-    if (n == t1 - t)
-    {
-      /* See if we can fit the us-ascii suffix, too. */
-      if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
-       break;
-      n = t1 - t - 1;
-      if (icode)
-       while (CONTINUATION_BYTE(t[n]))
-         --n;
-      assert (t + n >= t);
-      if (!n)
-      {
-       /* This should only happen in the really stupid case where the
-          only word that needs encoding is one character long, but
-          there is too much us-ascii stuff after it to use a single
-          encoded word. We add the next word to the encoded region
-          and try again. */
-       assert (t1 < u + ulen);
-       for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++)
-         ;
-       continue;
-      }
-      n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
-    }
-
-    /* Add to output buffer. */
-#define LINEBREAK "\n\t"
-    if (bufpos + wlen + strlen (LINEBREAK) > buflen)
-    {
-      buflen = bufpos + wlen + strlen (LINEBREAK);
-      safe_realloc (&buf, buflen);
-    }
-    r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
-    assert (r == wlen);
-    bufpos += wlen;
-    memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
-    bufpos += strlen (LINEBREAK);
-#undef LINEBREAK
-
-    col = 1;
-
-    t += n;
-  }
-
-  /* Add last encoded word and us-ascii suffix to buffer. */
-  buflen = bufpos + wlen + (u + ulen - t1);
-  safe_realloc (&buf, buflen + 1);
-  r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
-  assert (r == wlen);
-  bufpos += wlen;
-  memcpy (buf + bufpos, t1, u + ulen - t1);
-
-  FREE (&tocode1);
-  FREE (&u);
-
-  buf[buflen] = '\0';
-  
-  *e = buf;
-  *elen = buflen + 1;
-  return ret;
-}
-
-void _rfc2047_encode_string (char **pd, int encode_specials, int col)
-{
-  char *e;
-  size_t elen;
-  char *charsets;
-
-  if (!Charset || !*pd)
-    return;
-
-  charsets = SendCharset;
-  if (!charsets || !*charsets)
-    charsets = "utf-8";
-
-  rfc2047_encode (*pd, strlen (*pd), col,
-                 Charset, charsets, &e, &elen,
-                 encode_specials ? RFC822Specials : NULL);
-
-  FREE (pd);           /* __FREE_CHECKED__ */
-  *pd = e;
-}
-
-void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag)
-{
-  ADDRESS *ptr = addr;
-  int col = tag ? strlen (tag) + 2 : 32;
-  
-  while (ptr)
-  {
-    if (ptr->personal)
-      _rfc2047_encode_string (&ptr->personal, 1, col);
-#ifdef EXACT_ADDRESS
-    if (ptr->val)
-      _rfc2047_encode_string (&ptr->val, 1, col);
-#endif
-    ptr = ptr->next;
-  }
-}
-
  static int rfc2047_decode_word (char *d, const char *s, size_t len)
  {
    const char *pp, *pp1;
@@ -630,7 +119,7 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len)
    int enc = 0, count = 0;
    char *charset = NULL;
  
-  pd = d0 = safe_malloc (strlen (s));
+  pd = d0 = xmalloc (strlen (s));
  
    for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
    {
@@ -638,11 +127,11 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len)
      switch (count)
      {
        case 2:
-       /* ignore language specification a la RFC 2231 */        
+       /* ignore language specification a la RFC 2231 */
         t = pp1;
          if ((t1 = memchr (pp, '*', t - pp)))
           t = t1;
-       charset = safe_malloc (t - pp + 1);
+       charset = xmalloc (t - pp + 1);
         memcpy (charset, pp, t - pp);
         charset[t-pp] = '\0';
         break;
@@ -653,8 +142,8 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len)
           enc = ENCBASE64;
         else
         {
-         FREE (&charset);
-         FREE (&d0);
+         free(charset);
+         free(d0);
           return (-1);
         }
         break;
@@ -704,13 +193,13 @@ static int rfc2047_decode_word (char *d, const char *s, size_t len)
         break;
      }
    }
-  
-  if (charset)
-    mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
-  mutt_filter_unprintable (&d0);
+
+  size_t dlen;
+  if (charset && system_charset)
+    convert_string (d0, strlen(d0), charset, system_charset, &d0, &dlen);
    strfcpy (d, d0, len);
-  FREE (&charset);
-  FREE (&d0);
+  free (charset);
+  free (d0);
    return (0);
  }
  
@@ -805,7 +294,7 @@ void rfc2047_decode (char **pd)
      return;
  
    dlen = 4 * strlen (s); /* should be enough */
-  d = d0 = safe_malloc (dlen + 1);
+  d = d0 = xmalloc (dlen + 1);
  
    while (*s && dlen > 0)
    {
@@ -814,7 +303,7 @@ void rfc2047_decode (char **pd)
        /* no encoded words */
        if (option (OPTIGNORELWS))
        {
-        n = mutt_strlen (s);
+        n = strlen (s);
          if (found_encoded && (m = lwslen (s, n)) != 0)
          {
            if (m != n)
@@ -822,21 +311,6 @@ void rfc2047_decode (char **pd)
            s += m;
          }
        }
-      if (AssumedCharset && *AssumedCharset)
-      {
-       char *t;
-       size_t tlen;
-
-       n = mutt_strlen (s);
-       t = safe_malloc (n + 1);
-       strfcpy (t, s, n + 1);
-       convert_nonmime_string (&t);
-       tlen = mutt_strlen (t);
-       strncpy (d, t, tlen);
-       d += tlen;
-       FREE (&t);
-       break;
-      }
        strncpy (d, s, dlen);
        d += dlen;
        break;
@@ -880,28 +354,22 @@ void rfc2047_decode (char **pd)
      rfc2047_decode_word (d, p, dlen);
      found_encoded = 1;
      s = q;
-    n = mutt_strlen (d);
+    n = strlen (d);
      dlen -= n;
      d += n;
    }
    *d = 0;
  
-  FREE (pd);           /* __FREE_CHECKED__ */
+  free (*pd);
    *pd = d0;
-  mutt_str_adjust (pd);
  }
  
-void rfc2047_decode_adrlist (ADDRESS *a)
+/* Initialize the whole machinery */
+void rfc2047_init(void)
  {
-  while (a)
-  {
-    if (a->personal && ((strstr (a->personal, "=?") != NULL) || 
-                       (AssumedCharset && *AssumedCharset)))
-      rfc2047_decode (&a->personal);
-#ifdef EXACT_ADDRESS
-    if (a->val && strstr (a->val, "=?") != NULL)
-      rfc2047_decode (&a->val);
-#endif
-    a = a->next;
-  }
+  setlocale(LC_CTYPE, "");
+  system_charset = nl_langinfo(CODESET);
+  if (!system_charset[0])
+    system_charset = NULL;
+  debug("Charset is %s\n", system_charset);
  }
diff --git a/rfc2047.h b/rfc2047.h

index 9e15d2f21927671d69f2acd4365465a64e97d21b..0d1750b1fcab4fb1a80ddd9f508c6e0760cbe598 100644 (file)
--- a/rfc2047.h
+++ b/rfc2047.h
@@ -1,29 +1,23 @@
  /*
+ * This code for parsing rfc2047 encoding of headers has been adapted
+ * from the Mutt 1.5.16 MUA by Martin Mares <mj@ucw.cz>.
+ *
   * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
- * 
+ *
   *     This program is free software; you can redistribute it and/or modify
   *     it under the terms of the GNU General Public License as published by
   *     the Free Software Foundation; either version 2 of the License, or
   *     (at your option) any later version.
- * 
+ *
   *     This program is distributed in the hope that it will be useful,
   *     but WITHOUT ANY WARRANTY; without even the implied warranty of
   *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   *     GNU General Public License for more details.
- * 
+ *
   *     You should have received a copy of the GNU General Public License
   *     along with this program; if not, write to the Free Software
   *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */ 
-
-char *mutt_choose_charset (const char *fromcode, const char *charsets,
-                     char *u, size_t ulen, char **d, size_t *dlen);
-int convert_nonmime_string (char **);
-
-void _rfc2047_encode_string (char **, int, int);
-void rfc2047_encode_adrlist (ADDRESS *, const char *);
-
-#define rfc2047_encode_string(a) _rfc2047_encode_string (a, 0, 32);
+ */
  
  void rfc2047_decode (char **);
-void rfc2047_decode_adrlist (ADDRESS *);
+void rfc2047_init (void);
author	Martin Mares <mj@ucw.cz>
	Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)
committer	Martin Mares <mj@ucw.cz>
	Mon, 25 Jun 2007 15:53:40 +0000 (17:53 +0200)
Makefile		patch \| blob \| history
cm.c		patch \| blob \| history
rfc2047.c		patch \| blob \| history
rfc2047.h		patch \| blob \| history