rfc2047.c

   1 /*
   2  * Copyright (C) 1996-2000 Michael R. Elkins <me@mutt.org>
   3  * Copyright (C) 2000-2001 Edmund Grimley Evans <edmundo@rano.org>
   4  *
   5  *     This program is free software; you can redistribute it and/or modify
   6  *     it under the terms of the GNU General Public License as published by
   7  *     the Free Software Foundation; either version 2 of the License, or
   8  *     (at your option) any later version.
   9  *
  10  *     This program is distributed in the hope that it will be useful,
  11  *     but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  *     GNU General Public License for more details.
  14  *
  15  *     You should have received a copy of the GNU General Public License
  16  *     along with this program; if not, write to the Free Software
  17  *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  18  */
  19
  20 #if HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include "mutt.h"
  25 #include "mime.h"
  26 #include "charset.h"
  27 #include "rfc2047.h"
  28
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34
  35 /* If you are debugging this file, comment out the following line. */
  36 /*#define NDEBUG*/
  37
  38 #ifdef NDEBUG
  39 #define assert(x)
  40 #else
  41 #include <assert.h>
  42 #endif
  43
  44 #define ENCWORD_LEN_MAX 75
  45 #define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */
  46
  47 #define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')
  48
  49 #define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
  50
  51 extern char RFC822Specials[];
  52
  53 typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
  54                              const char *);
  55
  56 static size_t convert_string (ICONV_CONST char *f, size_t flen,
  57                               const char *from, const char *to,
  58                               char **t, size_t *tlen)
  59 {
  60   iconv_t cd;
  61   char *buf, *ob;
  62   size_t obl, n;
  63   int e;
  64
  65   cd = mutt_iconv_open (to, from, 0);
  66   if (cd == (iconv_t)(-1))
  67     return (size_t)(-1);
  68   obl = 4 * flen + 1;
  69   ob = buf = safe_malloc (obl);
  70   n = iconv (cd, &f, &flen, &ob, &obl);
  71   if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
  72   {
  73     e = errno;
  74     FREE (&buf);
  75     iconv_close (cd);
  76     errno = e;
  77     return (size_t)(-1);
  78   }
  79   *ob = '\0';
  80
  81   *tlen = ob - buf;
  82
  83   safe_realloc (&buf, ob - buf + 1);
  84   *t = buf;
  85   iconv_close (cd);
  86
  87   return n;
  88 }
  89
  90 int convert_nonmime_string (char **ps)
  91 {
  92   const char *c, *c1;
  93
  94   for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
  95   {
  96     char *u = *ps;
  97     char *s;
  98     char *fromcode;
  99     size_t m, n;
 100     size_t ulen = mutt_strlen (*ps);
 101     size_t slen;
 102
 103     if (!u || !*u)
 104       return 0;
 105
 106     c1 = strchr (c, ':');
 107     n = c1 ? c1 - c : mutt_strlen (c);
 108     if (!n)
 109       return 0;
 110     fromcode = safe_malloc (n + 1);
 111     strfcpy (fromcode, c, n + 1);
 112     m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
 113     FREE (&fromcode);
 114     if (m != (size_t)(-1))
 115     {
 116       FREE (ps); /* __FREE_CHECKED__ */
 117       *ps = s;
 118       return 0;
 119     }
 120   }
 121   mutt_convert_string (ps,
 122       (const char *)mutt_get_default_charset (AssumedCharset),
 123       Charset, M_ICONV_HOOK_FROM);
 124   return -1;
 125 }
 126
 127 char *mutt_choose_charset (const char *fromcode, const char *charsets,
 128                       char *u, size_t ulen, char **d, size_t *dlen)
 129 {
 130   char canonical_buff[LONG_STRING];
 131   char *e = 0, *tocode = 0;
 132   size_t elen = 0, bestn = 0;
 133   const char *p, *q;
 134
 135   for (p = charsets; p; p = q ? q + 1 : 0)
 136   {
 137     char *s, *t;
 138     size_t slen, n;
 139
 140     q = strchr (p, ':');
 141
 142     n = q ? q - p : strlen (p);
 143
 144     if (!n ||
 145         /* Assume that we never need more than 12 characters of
 146            encoded-text to encode a single character. */
 147         n > (ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 2 - 12))
 148       continue;
 149
 150     t = safe_malloc (n + 1);
 151     memcpy (t, p, n);
 152     t[n] = '\0';
 153
 154     n = convert_string (u, ulen, fromcode, t, &s, &slen);
 155     if (n == (size_t)(-1))
 156       continue;
 157
 158     if (!tocode || n < bestn)
 159     {
 160       bestn = n;
 161       FREE (&tocode);
 162       tocode = t;
 163       if (d)
 164       {
 165         FREE (&e);
 166         e = s;
 167       }
 168       else
 169         FREE (&s);
 170       elen = slen;
 171       if (!bestn)
 172         break;
 173     }
 174     else
 175     {
 176       FREE (&t);
 177       FREE (&s);
 178     }
 179   }
 180   if (tocode)
 181   {
 182     if (d)
 183       *d = e;
 184     if (dlen)
 185       *dlen = elen;
 186
 187     mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
 188     mutt_str_replace (&tocode, canonical_buff);
 189   }
 190   return tocode;
 191 }
 192
 193 static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 194                          const char *tocode)
 195 {
 196   char *s0 = s;
 197
 198   memcpy (s, "=?", 2), s += 2;
 199   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 200   memcpy (s, "?B?", 3), s += 3;
 201   for (;;)
 202   {
 203     if (!dlen)
 204       break;
 205     else if (dlen == 1)
 206     {
 207       *s++ = B64Chars[(*d >> 2) & 0x3f];
 208       *s++ = B64Chars[(*d & 0x03) << 4];
 209       *s++ = '=';
 210       *s++ = '=';
 211       break;
 212     }
 213     else if (dlen == 2)
 214     {
 215       *s++ = B64Chars[(*d >> 2) & 0x3f];
 216       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 217       *s++ = B64Chars[(d[1] & 0x0f) << 2];
 218       *s++ = '=';
 219       break;
 220     }
 221     else
 222     {
 223       *s++ = B64Chars[(*d >> 2) & 0x3f];
 224       *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
 225       *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
 226       *s++ = B64Chars[d[2] & 0x3f];
 227       d += 3, dlen -= 3;
 228     }
 229   }
 230   memcpy (s, "?=", 2), s += 2;
 231   return s - s0;
 232 }
 233
 234 static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
 235                          const char *tocode)
 236 {
 237   char hex[] = "0123456789ABCDEF";
 238   char *s0 = s;
 239
 240   memcpy (s, "=?", 2), s += 2;
 241   memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
 242   memcpy (s, "?Q?", 3), s += 3;
 243   while (dlen--)
 244   {
 245     unsigned char c = *d++;
 246     if (c == ' ')
 247       *s++ = '_';
 248     else if (c >= 0x7f || c < 0x20 || c == '_' ||  strchr (MimeSpecials, c))
 249     {
 250       *s++ = '=';
 251       *s++ = hex[(c & 0xf0) >> 4];
 252       *s++ = hex[c & 0x0f];
 253     }
 254     else
 255       *s++ = c;
 256   }
 257   memcpy (s, "?=", 2), s += 2;
 258   return s - s0;
 259 }
 260
 261 /*
 262  * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 263  * be converted to an encoded word of length *wlen using *encoder.
 264  * Otherwise return an upper bound on the maximum length of the data
 265  * which could be converted.
 266  * The data is converted from fromcode (which must be stateless) to
 267  * tocode, unless fromcode is 0, in which case the data is assumed to
 268  * be already in tocode, which should be 8-bit and stateless.
 269  */
 270 static size_t try_block (ICONV_CONST char *d, size_t dlen,
 271                          const char *fromcode, const char *tocode,
 272                          encoder_t *encoder, size_t *wlen)
 273 {
 274   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 275   iconv_t cd;
 276   ICONV_CONST char *ib;
 277   char *ob, *p;
 278   size_t ibl, obl;
 279   int count, len, len_b, len_q;
 280
 281   if (fromcode)
 282   {
 283     cd = mutt_iconv_open (tocode, fromcode, 0);
 284     assert (cd != (iconv_t)(-1));
 285     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 286     if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
 287         iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
 288     {
 289       assert (errno == E2BIG);
 290       iconv_close (cd);
 291       assert (ib > d);
 292       return (ib - d == dlen) ? dlen : ib - d + 1;
 293     }
 294     iconv_close (cd);
 295   }
 296   else
 297   {
 298     if (dlen > sizeof (buf1) - strlen (tocode))
 299       return sizeof (buf1) - strlen (tocode) + 1;
 300     memcpy (buf1, d, dlen);
 301     ob = buf1 + dlen;
 302   }
 303
 304   count = 0;
 305   for (p = buf1; p < ob; p++)
 306   {
 307     unsigned char c = *p;
 308     assert (strchr (MimeSpecials, '?'));
 309     if (c >= 0x7f || c < 0x20 || *p == '_' ||
 310         (c != ' ' && strchr (MimeSpecials, *p)))
 311       ++count;
 312   }
 313
 314   len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
 315   len_b = len + (((ob - buf1) + 2) / 3) * 4;
 316   len_q = len + (ob - buf1) + 2 * count;
 317
 318   /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
 319   if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
 320     len_q = ENCWORD_LEN_MAX + 1;
 321
 322   if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
 323   {
 324     *encoder = b_encoder;
 325     *wlen = len_b;
 326     return 0;
 327   }
 328   else if (len_q <= ENCWORD_LEN_MAX)
 329   {
 330     *encoder = q_encoder;
 331     *wlen = len_q;
 332     return 0;
 333   }
 334   else
 335     return dlen;
 336 }
 337
 338 /*
 339  * Encode the data (d, dlen) into s using the encoder.
 340  * Return the length of the encoded word.
 341  */
 342 static size_t encode_block (char *s, char *d, size_t dlen,
 343                             const char *fromcode, const char *tocode,
 344                             encoder_t encoder)
 345 {
 346   char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
 347   iconv_t cd;
 348   ICONV_CONST char *ib;
 349   char *ob;
 350   size_t ibl, obl, n1, n2;
 351
 352   if (fromcode)
 353   {
 354     cd = mutt_iconv_open (tocode, fromcode, 0);
 355     assert (cd != (iconv_t)(-1));
 356     ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
 357     n1 = iconv (cd, &ib, &ibl, &ob, &obl);
 358     n2 = iconv (cd, 0, 0, &ob, &obl);
 359     assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
 360     iconv_close (cd);
 361     return (*encoder) (s, buf1, ob - buf1, tocode);
 362   }
 363   else
 364     return (*encoder) (s, d, dlen, tocode);
 365 }
 366
 367 /*
 368  * Discover how much of the data (d, dlen) can be converted into
 369  * a single encoded word. Return how much data can be converted,
 370  * and set the length *wlen of the encoded word and *encoder.
 371  * We start in column col, which limits the length of the word.
 372  */
 373 static size_t choose_block (char *d, size_t dlen, int col,
 374                             const char *fromcode, const char *tocode,
 375                             encoder_t *encoder, size_t *wlen)
 376 {
 377   size_t n, nn;
 378   int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
 379
 380   n = dlen;
 381   for (;;)
 382   {
 383     assert (d + n > d);
 384     nn = try_block (d, n, fromcode, tocode, encoder, wlen);
 385     if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
 386       break;
 387     n = (nn ? nn : n) - 1;
 388     assert (n > 0);
 389     if (utf8)
 390       while (n > 1 && CONTINUATION_BYTE(d[n]))
 391         --n;
 392   }
 393   return n;
 394 }
 395
 396 /*
 397  * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 398  * allocated buffer (e, elen). The input data is in charset fromcode
 399  * and is converted into a charset chosen from charsets.
 400  * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 401  * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 402  * compatible with us-ascii and the original data is used.
 403  * The input data is assumed to be a single line starting at column col;
 404  * if col is non-zero, the preceding character was a space.
 405  */
 406 static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
 407                            const char *fromcode, const char *charsets,
 408                            char **e, size_t *elen, char *specials)
 409 {
 410   int ret = 0;
 411   char *buf;
 412   size_t bufpos, buflen;
 413   char *u, *t0, *t1, *t;
 414   char *s0, *s1;
 415   size_t ulen, r, n, wlen;
 416   encoder_t encoder;
 417   char *tocode1 = 0;
 418   const char *tocode;
 419   char *icode = "utf-8";
 420
 421   /* Try to convert to UTF-8. */
 422   if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
 423   {
 424     ret = 1;
 425     icode = 0;
 426     u = safe_malloc ((ulen = dlen) + 1);
 427     memcpy (u, d, dlen);
 428     u[ulen] = 0;
 429   }
 430
 431   /* Find earliest and latest things we must encode. */
 432   s0 = s1 = t0 = t1 = 0;
 433   for (t = u; t < u + ulen; t++)
 434   {
 435     if ((*t & 0x80) ||
 436         (*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1)))))
 437     {
 438       if (!t0) t0 = t;
 439       t1 = t;
 440     }
 441     else if (specials && strchr (specials, *t))
 442     {
 443       if (!s0) s0 = t;
 444       s1 = t;
 445     }
 446   }
 447
 448   /* If we have something to encode, include RFC822 specials */
 449   if (t0 && s0 && s0 < t0)
 450     t0 = s0;
 451   if (t1 && s1 && s1 > t1)
 452     t1 = s1;
 453
 454   if (!t0)
 455   {
 456     /* No encoding is required. */
 457     *e = u;
 458     *elen = ulen;
 459     return ret;
 460   }
 461
 462   /* Choose target charset. */
 463   tocode = fromcode;
 464   if (icode)
 465   {
 466     if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
 467       tocode = tocode1;
 468     else
 469       ret = 2, icode = 0;
 470   }
 471
 472   /* Hack to avoid labelling 8-bit data as us-ascii. */
 473   if (!icode && mutt_is_us_ascii (tocode))
 474     tocode = "unknown-8bit";
 475
 476   /* Adjust t0 for maximum length of line. */
 477   t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
 478   if (t < u)  t = u;
 479   if (t < t0) t0 = t;
 480
 481
 482   /* Adjust t0 until we can encode a character after a space. */
 483   for (; t0 > u; t0--)
 484   {
 485     if (!HSPACE(*(t0-1)))
 486       continue;
 487     t = t0 + 1;
 488     if (icode)
 489       while (t < u + ulen && CONTINUATION_BYTE(*t))
 490         ++t;
 491     if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
 492         col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
 493       break;
 494   }
 495
 496   /* Adjust t1 until we can encode a character before a space. */
 497   for (; t1 < u + ulen; t1++)
 498   {
 499     if (!HSPACE(*t1))
 500       continue;
 501     t = t1 - 1;
 502     if (icode)
 503       while (CONTINUATION_BYTE(*t))
 504         --t;
 505     if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
 506         1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 507       break;
 508   }
 509
 510   /* We shall encode the region [t0,t1). */
 511
 512   /* Initialise the output buffer with the us-ascii prefix. */
 513   buflen = 2 * ulen;
 514   buf = safe_malloc (buflen);
 515   bufpos = t0 - u;
 516   memcpy (buf, u, t0 - u);
 517
 518   col += t0 - u;
 519
 520   t = t0;
 521   for (;;)
 522   {
 523     /* Find how much we can encode. */
 524     n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
 525     if (n == t1 - t)
 526     {
 527       /* See if we can fit the us-ascii suffix, too. */
 528       if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
 529         break;
 530       n = t1 - t - 1;
 531       if (icode)
 532         while (CONTINUATION_BYTE(t[n]))
 533           --n;
 534       assert (t + n >= t);
 535       if (!n)
 536       {
 537         /* This should only happen in the really stupid case where the
 538            only word that needs encoding is one character long, but
 539            there is too much us-ascii stuff after it to use a single
 540            encoded word. We add the next word to the encoded region
 541            and try again. */
 542         assert (t1 < u + ulen);
 543         for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++)
 544           ;
 545         continue;
 546       }
 547       n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
 548     }
 549
 550     /* Add to output buffer. */
 551 #define LINEBREAK "\n\t"
 552     if (bufpos + wlen + strlen (LINEBREAK) > buflen)
 553     {
 554       buflen = bufpos + wlen + strlen (LINEBREAK);
 555       safe_realloc (&buf, buflen);
 556     }
 557     r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
 558     assert (r == wlen);
 559     bufpos += wlen;
 560     memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
 561     bufpos += strlen (LINEBREAK);
 562 #undef LINEBREAK
 563
 564     col = 1;
 565
 566     t += n;
 567   }
 568
 569   /* Add last encoded word and us-ascii suffix to buffer. */
 570   buflen = bufpos + wlen + (u + ulen - t1);
 571   safe_realloc (&buf, buflen + 1);
 572   r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
 573   assert (r == wlen);
 574   bufpos += wlen;
 575   memcpy (buf + bufpos, t1, u + ulen - t1);
 576
 577   FREE (&tocode1);
 578   FREE (&u);
 579
 580   buf[buflen] = '\0';
 581
 582   *e = buf;
 583   *elen = buflen + 1;
 584   return ret;
 585 }
 586
 587 void _rfc2047_encode_string (char **pd, int encode_specials, int col)
 588 {
 589   char *e;
 590   size_t elen;
 591   char *charsets;
 592
 593   if (!Charset || !*pd)
 594     return;
 595
 596   charsets = SendCharset;
 597   if (!charsets || !*charsets)
 598     charsets = "utf-8";
 599
 600   rfc2047_encode (*pd, strlen (*pd), col,
 601                   Charset, charsets, &e, &elen,
 602                   encode_specials ? RFC822Specials : NULL);
 603
 604   FREE (pd);            /* __FREE_CHECKED__ */
 605   *pd = e;
 606 }
 607
 608 void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag)
 609 {
 610   ADDRESS *ptr = addr;
 611   int col = tag ? strlen (tag) + 2 : 32;
 612
 613   while (ptr)
 614   {
 615     if (ptr->personal)
 616       _rfc2047_encode_string (&ptr->personal, 1, col);
 617 #ifdef EXACT_ADDRESS
 618     if (ptr->val)
 619       _rfc2047_encode_string (&ptr->val, 1, col);
 620 #endif
 621     ptr = ptr->next;
 622   }
 623 }
 624
 625 static int rfc2047_decode_word (char *d, const char *s, size_t len)
 626 {
 627   const char *pp, *pp1;
 628   char *pd, *d0;
 629   const char *t, *t1;
 630   int enc = 0, count = 0;
 631   char *charset = NULL;
 632
 633   pd = d0 = safe_malloc (strlen (s));
 634
 635   for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
 636   {
 637     count++;
 638     switch (count)
 639     {
 640       case 2:
 641         /* ignore language specification a la RFC 2231 */
 642         t = pp1;
 643         if ((t1 = memchr (pp, '*', t - pp)))
 644           t = t1;
 645         charset = safe_malloc (t - pp + 1);
 646         memcpy (charset, pp, t - pp);
 647         charset[t-pp] = '\0';
 648         break;
 649       case 3:
 650         if (toupper ((unsigned char) *pp) == 'Q')
 651           enc = ENCQUOTEDPRINTABLE;
 652         else if (toupper ((unsigned char) *pp) == 'B')
 653           enc = ENCBASE64;
 654         else
 655         {
 656           FREE (&charset);
 657           FREE (&d0);
 658           return (-1);
 659         }
 660         break;
 661       case 4:
 662         if (enc == ENCQUOTEDPRINTABLE)
 663         {
 664           for (; pp < pp1; pp++)
 665           {
 666             if (*pp == '_')
 667               *pd++ = ' ';
 668             else if (*pp == '=' &&
 669                      (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
 670                      (!(pp[2] & ~127) && hexval(pp[2]) != -1))
 671             {
 672               *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
 673               pp += 2;
 674             }
 675             else
 676               *pd++ = *pp;
 677           }
 678           *pd = 0;
 679         }
 680         else if (enc == ENCBASE64)
 681         {
 682           int c, b = 0, k = 0;
 683
 684           for (; pp < pp1; pp++)
 685           {
 686             if (*pp == '=')
 687               break;
 688             if ((*pp & ~127) || (c = base64val(*pp)) == -1)
 689               continue;
 690             if (k + 6 >= 8)
 691             {
 692               k -= 2;
 693               *pd++ = b | (c >> k);
 694               b = c << (8 - k);
 695             }
 696             else
 697             {
 698               b |= c << (k + 2);
 699               k += 6;
 700             }
 701           }
 702           *pd = 0;
 703         }
 704         break;
 705     }
 706   }
 707
 708   if (charset)
 709     mutt_convert_string (&d0, charset, Charset, M_ICONV_HOOK_FROM);
 710   mutt_filter_unprintable (&d0);
 711   strfcpy (d, d0, len);
 712   FREE (&charset);
 713   FREE (&d0);
 714   return (0);
 715 }
 716
 717 /*
 718  * Find the start and end of the first encoded word in the string.
 719  * We use the grammar in section 2 of RFC 2047, but the "encoding"
 720  * must be B or Q. Also, we don't require the encoded word to be
 721  * separated by linear-white-space (section 5(1)).
 722  */
 723 static const char *find_encoded_word (const char *s, const char **x)
 724 {
 725   const char *p, *q;
 726
 727   q = s;
 728   while ((p = strstr (q, "=?")))
 729   {
 730     for (q = p + 2;
 731          0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
 732          q++)
 733       ;
 734     if (q[0] != '?' || !strchr ("BbQq", q[1]) || q[2] != '?')
 735       continue;
 736     for (q = q + 3; 0x20 < *q && *q < 0x7f && *q != '?'; q++)
 737       ;
 738     if (q[0] != '?' || q[1] != '=')
 739     {
 740       --q;
 741       continue;
 742     }
 743
 744     *x = q + 2;
 745     return p;
 746   }
 747
 748   return 0;
 749 }
 750
 751 /* return length of linear-white-space */
 752 static size_t lwslen (const char *s, size_t n)
 753 {
 754   const char *p = s;
 755   size_t len = n;
 756
 757   if (n <= 0)
 758     return 0;
 759
 760   for (; p < s + n; p++)
 761     if (!strchr (" \t\r\n", *p))
 762     {
 763       len = (size_t)(p - s);
 764       break;
 765     }
 766   if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
 767     len = (size_t)0;
 768   return len;
 769 }
 770
 771 /* return length of linear-white-space : reverse */
 772 static size_t lwsrlen (const char *s, size_t n)
 773 {
 774   const char *p = s + n - 1;
 775   size_t len = n;
 776
 777   if (n <= 0)
 778     return 0;
 779
 780   if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
 781     return (size_t)0;
 782
 783   for (; p >= s; p--)
 784     if (!strchr (" \t\r\n", *p))
 785     {
 786       len = (size_t)(s + n - 1 - p);
 787       break;
 788     }
 789   return len;
 790 }
 791
 792 /* try to decode anything that looks like a valid RFC2047 encoded
 793  * header field, ignoring RFC822 parsing rules
 794  */
 795 void rfc2047_decode (char **pd)
 796 {
 797   const char *p, *q;
 798   size_t m, n;
 799   int found_encoded = 0;
 800   char *d0, *d;
 801   const char *s = *pd;
 802   size_t dlen;
 803
 804   if (!s || !*s)
 805     return;
 806
 807   dlen = 4 * strlen (s); /* should be enough */
 808   d = d0 = safe_malloc (dlen + 1);
 809
 810   while (*s && dlen > 0)
 811   {
 812     if (!(p = find_encoded_word (s, &q)))
 813     {
 814       /* no encoded words */
 815       if (option (OPTIGNORELWS))
 816       {
 817         n = mutt_strlen (s);
 818         if (found_encoded && (m = lwslen (s, n)) != 0)
 819         {
 820           if (m != n)
 821             *d = ' ', d++, dlen--;
 822           s += m;
 823         }
 824       }
 825       if (AssumedCharset && *AssumedCharset)
 826       {
 827         char *t;
 828         size_t tlen;
 829
 830         n = mutt_strlen (s);
 831         t = safe_malloc (n + 1);
 832         strfcpy (t, s, n + 1);
 833         convert_nonmime_string (&t);
 834         tlen = mutt_strlen (t);
 835         strncpy (d, t, tlen);
 836         d += tlen;
 837         FREE (&t);
 838         break;
 839       }
 840       strncpy (d, s, dlen);
 841       d += dlen;
 842       break;
 843     }
 844
 845     if (p != s)
 846     {
 847       n = (size_t) (p - s);
 848       /* ignore spaces between encoded word
 849        * and linear-white-space between encoded word and *text */
 850       if (option (OPTIGNORELWS))
 851       {
 852         if (found_encoded && (m = lwslen (s, n)) != 0)
 853         {
 854           if (m != n)
 855             *d = ' ', d++, dlen--;
 856           n -= m, s += m;
 857         }
 858
 859         if ((m = n - lwsrlen (s, n)) != 0)
 860         {
 861           if (m > dlen)
 862             m = dlen;
 863           memcpy (d, s, m);
 864           d += m;
 865           dlen -= m;
 866           if (m != n)
 867             *d = ' ', d++, dlen--;
 868         }
 869       }
 870       else if (!found_encoded || strspn (s, " \t\r\n") != n)
 871       {
 872         if (n > dlen)
 873           n = dlen;
 874         memcpy (d, s, n);
 875         d += n;
 876         dlen -= n;
 877       }
 878     }
 879
 880     rfc2047_decode_word (d, p, dlen);
 881     found_encoded = 1;
 882     s = q;
 883     n = mutt_strlen (d);
 884     dlen -= n;
 885     d += n;
 886   }
 887   *d = 0;
 888
 889   FREE (pd);            /* __FREE_CHECKED__ */
 890   *pd = d0;
 891   mutt_str_adjust (pd);
 892 }
 893
 894 void rfc2047_decode_adrlist (ADDRESS *a)
 895 {
 896   while (a)
 897   {
 898     if (a->personal && ((strstr (a->personal, "=?") != NULL) ||
 899                         (AssumedCharset && *AssumedCharset)))
 900       rfc2047_decode (&a->personal);
 901 #ifdef EXACT_ADDRESS
 902     if (a->val && strstr (a->val, "=?") != NULL)
 903       rfc2047_decode (&a->val);
 904 #endif
 905     a = a->next;
 906   }
 907 }