From 39359f268c445fa4a4defb38e644e519c58660f4 Mon Sep 17 00:00:00 2001
From: Pavel Charvat <pavel.charvat@netcentrum.cz>
Date: Tue, 11 Dec 2007 12:16:42 +0100
Subject: [PATCH] LibCharset: Implemented UTF-16 encoding (not tested yet).

---
 charset/charconv-gen.h | 289 ++++++++++++++++++++++++++++++
 charset/charconv.c     | 395 ++++++++++++++++++++++-------------------
 charset/charconv.h     |   3 +
 charset/setnames.c     |   5 +-
 4 files changed, 513 insertions(+), 179 deletions(-)
 create mode 100644 charset/charconv-gen.h

diff --git a/charset/charconv-gen.h b/charset/charconv-gen.h
new file mode 100644
index 00000000..6740c1b4
--- /dev/null
+++ b/charset/charconv-gen.h
@@ -0,0 +1,289 @@
+/*
+ *	Character Set Conversion Library 1.2
+ *
+ *	(c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
+ *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ */
+
+/* Generator of inlined conversion routines */
+
+do {
+
+/*** Header ***/
+  const byte *s, *se;
+  byte *d, *de;
+  uns code;
+  int e;
+
+#ifdef CONV_READ_STD
+  unsigned short *in_to_x = c->in_to_x;
+#endif
+
+#ifdef CONV_WRITE_STD
+  unsigned short *x_to_out = c->x_to_out;
+#endif
+
+#ifdef CONV_READ_UTF8
+  uns cc;
+#endif
+
+  if (unlikely(c->state))
+    goto slow;
+ main:
+  s = c->source;
+  se = c->source_end;
+  d = c->dest;
+  de = c->dest_end;
+
+  while (1)
+    {
+
+/*** Read ***/
+
+#ifdef CONV_READ_STD
+      if (unlikely(s >= se))
+	break;
+#ifndef CONV_WRITE_STD
+      code = x_to_uni[in_to_x[*s++]];
+#endif
+#endif
+
+#ifdef CONV_READ_UTF8
+      if (unlikely(s >= se))
+	break;
+      cc = *s++;
+      if (cc < 0x80)
+	code = cc;
+      else if (cc >= 0xc0)
+        {
+	  if (s + 6 > se)
+	    goto send_utf;
+	  if (cc < 0xe0)
+	    {
+	      if ((s[0] & 0xc0) != 0x80)
+		goto nocode;
+	      code = cc & 0x1f;
+	      code = (code << 6) | (*s++ & 0x3f);
+	    }
+	  else if (cc < 0xf0)
+	    {
+	      if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
+		goto nocode;
+	      code = cc & 0x0f;
+	      code = (code << 6) | (*s++ & 0x3f);
+	      code = (code << 6) | (*s++ & 0x3f);
+	    }
+	  else if (cc < 0xfc)
+	    {
+	      while (cc & 0x80)
+	        {
+		  if ((*s++ & 0xc0) != 0x80)
+		    break;
+		  cc <<= 1;
+		}
+	      goto nocode;
+	    }
+	  else
+	    goto nocode;
+	}
+      else
+        {
+nocode:
+	  code = UNI_REPLACEMENT;
+	}
+#endif
+
+#ifdef CONV_READ_UTF16_BE
+      if (unlikely(s + 4 >= se))
+        {
+	  c->state = UTF16_BE_READ;
+	  goto go_slow;
+	}
+      s = utf16_be_get(s, &code);
+#endif
+
+#ifdef CONV_READ_UTF16_LE
+      if (unlikely(s + 4 >= se))
+        {
+	  c->state = UTF16_LE_READ;
+	  goto go_slow;
+	}
+      s = utf16_le_get(s, &code);
+#endif
+
+/*** Write ***/
+
+got_code:
+
+#ifdef CONV_WRITE_STD
+#ifndef CONV_READ_STD
+      code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
+#else
+      code = x_to_out[in_to_x[*s++]];
+#endif
+      if (code < 0x100)
+        {
+	  if (unlikely(d >= de))
+	    {
+	      c->state = SINGLE_WRITE;
+	      c->code = code;
+	      goto go_slow;
+	    }
+	  *d++ = code;
+	}
+      else
+        {
+	  byte *k = string_table + code - 0x100;
+	  uns len = *k++;
+	  if (unlikely(de - d < len))
+	    {
+	      c->state = SEQ_WRITE;
+	      c->string_at = k;
+	      c->remains = len;
+	      goto go_slow;
+	    }
+	  while (len--)
+	    *d++ = *k++;
+	}
+#endif
+
+#ifdef CONV_WRITE_UTF8
+      if (code < 0x80)
+        {
+	  if (d >= de)
+	    goto dend;
+	  *d++ = code;
+	}
+      else if (code < 0x800)
+        {
+	  if (d + 2 > de)
+	    goto dend_utf;
+	  *d++ = 0xc0 | (code >> 6);
+	  *d++ = 0x80 | (code & 0x3f);
+	}
+      else
+        {
+	  if (d + 3 > de)
+	    goto dend_utf;
+	  *d++ = 0xe0 | (code >> 12);
+	  *d++ = 0x80 | ((code >> 6) & 0x3f);
+	  *d++ = 0x80 | (code & 0x3f);
+	}
+#endif
+
+#ifdef CONV_WRITE_UTF16_BE
+      if (unlikely(de - d < 2))
+	goto write_slow;
+      else if (code < 0xd800 || code - 0xe000 < 0x2000 ||
+          ((code -= 0x10000) >= 0x10000 && (code = UNI_REPLACEMENT)))
+        {
+	  *d++ = code >> 8;
+	  *d++ = code & 0xff;
+	}
+      else if (likely(de - d < 4))
+        {
+	  *d++ = 0xd8 | (code >> 18);
+	  *d++ = (code >> 10) & 0xff;
+	  *d++ = 0xdc | ((code >> 8) & 3);
+	  *d++ = code & 0xff;
+	}
+      else
+        {
+write_slow:
+	  c->state = UTF16_BE_WRITE;
+	  goto go_slow;
+	}
+#endif
+
+#ifdef CONV_WRITE_UTF16_LE
+      if (unlikely(de - d < 2))
+	goto write_slow;
+      else if (code < 0xd800 || code - 0xe000 < 0x2000 ||
+	  ((code -= 0x10000) >= 0x10000 && (code = UNI_REPLACEMENT)))
+        {
+	  *d++ = code & 0xff;
+	  *d++ = code >> 8;
+	}
+      else if (likely(de - d < 4))
+        {
+	  *d++ = (code >> 10) & 0xff;
+	  *d++ = 0xd8 | (code >> 18);
+	  *d++ = code & 0xff;
+	  *d++ = 0xdc | ((code >> 8) & 3);
+	}
+      else
+        {
+write_slow:
+	  c->state = UTF16_LE_WRITE;
+	  goto go_slow;
+	}
+#endif
+
+    }
+
+/*** Footer ***/
+
+  c->source = s;
+  c->dest = d;
+  return CONV_SOURCE_END;
+
+#ifdef CONV_READ_UTF8
+ send_utf:
+  if (cc < 0xe0)		{ c->code = cc & 0x1f; c->remains = 1; }
+  else if (cc < 0xf0)		{ c->code = cc & 0x0f; c->remains = 2; }
+  else
+    {
+      c->code = ~0U;
+      if (cc < 0xf8)		c->remains = 3;
+      else if (cc < 0xfc)	c->remains = 4;
+      else if (cc < 0xfe)	c->remains = 5;
+      else goto nocode;
+    }
+  c->state = UTF8_READ;
+  goto go_slow;
+#endif
+
+#ifdef CONV_WRITE_UTF8
+ dend:
+  c->source = s;
+  c->dest = d;
+  return CONV_DEST_END;
+ dend_utf:
+  c->state = UTF8_WRITE_START;
+  c->code = code;
+  goto go_slow;
+#endif
+
+ go_slow:
+  c->source = s;
+  c->dest = d;
+ slow:
+  e = conv_slow(c);
+  if (e < 0)
+    {
+      code = c->code;
+      s = c->source;
+      se = c->source_end;
+      d = c->dest;
+      de = c->dest_end;
+      goto got_code;
+    }
+  if (e)
+    return e;
+  goto main;
+
+} while (0);
+
+/*** Undefine all parameters ***/
+
+#undef CONV_READ_STD
+#undef CONV_READ_UTF8
+#undef CONV_READ_UTF16_BE
+#undef CONV_READ_UTF16_LE
+#undef CONV_WRITE_STD
+#undef CONV_WRITE_UTF8
+#undef CONV_WRITE_UTF16_BE
+#undef CONV_WRITE_UTF16_LE
diff --git a/charset/charconv.c b/charset/charconv.c
index fcd8680c..36796a7b 100644
--- a/charset/charconv.c
+++ b/charset/charconv.c
@@ -2,12 +2,15 @@
  *	Character Set Conversion Library 1.2
  *
  *	(c) 1998--2004 Martin Mares <mj@ucw.cz>
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
  *
  *	This software may be freely distributed and used according to the terms
  *	of the GNU Lesser General Public License.
  */
 
 #include "lib/lib.h"
+#include "lib/unicode.h"
+#include "lib/unaligned.h"
 #include "charset/charconv.h"
 #include "charset/chartable.h"
 
@@ -32,7 +35,17 @@ enum state {
   SEQ_WRITE,
   UTF8_READ,
   UTF8_WRITE_START,
-  UTF8_WRITE_CONT
+  UTF8_WRITE_CONT,
+  UTF16_BE_WRITE,
+  UTF16_LE_WRITE,
+  UTF16_BE_READ,
+  UTF16_BE_READ_1,
+  UTF16_BE_READ_2,
+  UTF16_BE_READ_3,
+  UTF16_LE_READ,
+  UTF16_LE_READ_1,
+  UTF16_LE_READ_2,
+  UTF16_LE_READ_3,
 };
 
 static int
@@ -51,6 +64,7 @@ conv_slow(struct conv_context *c)
       *d++ = c->code;
       break;
     case SEQ_WRITE:
+seq:
       while (c->remains)
 	{
 	  if (d >= de)
@@ -59,6 +73,7 @@ conv_slow(struct conv_context *c)
 	  c->remains--;
 	}
       break;
+
     case UTF8_READ:
       while (c->remains)
 	{
@@ -77,6 +92,8 @@ conv_slow(struct conv_context *c)
       c->source = s;
       c->state = 0;
       return -1;
+
+    /* Writing of UTF-8 */
     case UTF8_WRITE_START:
       if (d >= de)
 	goto cde;
@@ -110,6 +127,116 @@ conv_slow(struct conv_context *c)
 	  c->remains--;
 	}
       break;
+
+    /* Writing of UTF-16BE */
+    case UTF16_BE_WRITE:
+      {
+	void *p = &c->code;
+	c->string_at = p;
+	if (c->code < 0xd800 || c->code - 0xe000 < 0x2000)
+	  {}
+	else if ((c->code -= 0x10000) < 0x100000)
+	  {
+	    put_u16_be(p, 0xd800 | (c->code >> 10));
+	    put_u16_be(p + 2, 0xdc00 | (c->code & 0x3ff));
+	    c->remains = 4;
+	    goto seq;
+	  }
+	else
+	  c->code = UNI_REPLACEMENT;
+	put_u16_be(p, c->code);
+	c->remains = 2;
+	goto seq;
+      }
+
+    /* Writing of UTF-16LE */
+    case UTF16_LE_WRITE:
+      {
+	void *p = &c->code;
+	c->string_at = p;
+	if (c->code < 0xd800 || c->code - 0xe000 < 0x2000)
+	  {}
+	else if ((c->code -= 0x10000) < 0x100000)
+	  {
+	    put_u16_le(p, 0xd800 | (c->code >> 10));
+	    put_u16_le(p + 2, 0xdc00 | (c->code & 0x3ff));
+	    c->remains = 4;
+	  }
+	else
+	  c->code = UNI_REPLACEMENT;
+        put_u16_le(p, c->code);
+        c->remains = 2;
+	goto seq;
+      }
+
+    /* Reading of UTF16-BE */
+    case UTF16_BE_READ:
+      if (s >= se)
+	goto cse;
+      c->code = *s++;
+      c->state = UTF16_BE_READ_1;
+      /* fall-thru */
+    case UTF16_BE_READ_1:
+      if (s >= se)
+	goto cse;
+      c->code = (c->code << 8) | *s++;
+      if (c->code - 0xd800 >= 0x800)
+	break;
+      c->code = (c->code - 0xd800) << 10;
+      c->state = UTF16_BE_READ_2;
+      /* fall-thru */
+    case UTF16_BE_READ_2:
+      if (s >= se)
+	goto cse;
+      if (*s - 0xdc >= 4)
+	c->code = ~0U;
+      else
+	c->code |= (*s - 0xdc) << 8;
+      s++;
+      c->state = UTF16_BE_READ_3;
+      /* fall-thru */
+    case UTF16_BE_READ_3:
+      if (s >= se)
+	goto cse;
+      if ((int)c->code >= 0)
+	c->code += 0x10000 + *s;
+      else
+	c->code = UNI_REPLACEMENT;
+      s++;
+      break;
+
+    /* Reading of UTF16-LE */
+    case UTF16_LE_READ:
+      if (s >= se)
+	goto cse;
+      c->code = *s++;
+      c->state = UTF16_LE_READ_1;
+      /* fall-thru */
+    case UTF16_LE_READ_1:
+      if (s >= se)
+	goto cse;
+      c->code |= *s++ << 8;
+      if (c->code - 0xd800 >= 0x800)
+	break;
+      c->code = (c->code - 0xd800) << 10;
+      c->state = UTF16_LE_READ_2;
+      /* fall-thru */
+    case UTF16_LE_READ_2:
+      if (s >= se)
+	goto cse;
+      c->code |= *s++;
+      c->state = UTF16_LE_READ_3;
+      /* fall-thru */
+    case UTF16_LE_READ_3:
+      if (s >= se)
+	goto cse;
+      if (*s - 0xdc < 4)
+	c->code += 0x10000 + ((*s - 0xdc) << 8);
+      else
+	c->code = UNI_REPLACEMENT;
+      s++;
+      break;
+
     default:
       ASSERT(0);
     }
@@ -127,191 +254,94 @@ conv_slow(struct conv_context *c)
   return CONV_DEST_END;
 }
 
+/* Generate inlined routines */
+
 static int
-conv_from_utf8(struct conv_context *c)
+conv_std_to_utf8(struct conv_context *c)
 {
-  unsigned short *x_to_out = c->x_to_out;
-  const unsigned char *s, *se;
-  unsigned char *d, *de, *k;
-  unsigned int code, cc, len;
-  int e;
+#define CONV_READ_STD
+#define CONV_WRITE_UTF8
+#include "charset/charconv-gen.h"
+}
 
-  if (unlikely(c->state))
-    goto slow;
+static int
+conv_utf8_to_std(struct conv_context *c)
+{
+#define CONV_READ_UTF8
+#define CONV_WRITE_STD
+#include "charset/charconv-gen.h"
+}
 
-main:
-  s = c->source;
-  se = c->source_end;
-  d = c->dest;
-  de = c->dest_end;
-  while (s < se)			/* Optimized for speed, beware of spaghetti code */
-    {
-      cc = *s++;
-      if (cc < 0x80)
-	code = cc;
-      else if (cc >= 0xc0)
-	{
-	  if (s + 6 > se)
-	    goto send_utf;
-	  if (cc < 0xe0)
-	    {
-	      if ((s[0] & 0xc0) != 0x80)
-		goto nocode;
-	      code = cc & 0x1f;
-	      code = (code << 6) | (*s++ & 0x3f);
-	    }
-	  else if (cc < 0xf0)
-	    {
-	      if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
-		goto nocode;
-	      code = cc & 0x0f;
-	      code = (code << 6) | (*s++ & 0x3f);
-	      code = (code << 6) | (*s++ & 0x3f);
-	    }
-	  else if (cc < 0xfc)
-	    {
-	      while (cc & 0x80)
-		{
-		  if ((*s++ & 0xc0) != 0x80)
-		    break;
-		  cc <<= 1;
-		}
-	      goto nocode;
-	    }
-	  else
-	    goto nocode;
-	}
-      else
-	{
-	nocode:
-	  code = 0xfffd;
-	}
-    got_code:
-      code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
-      if (code < 0x100)
-	{
-	  if (d >= de)
-	    goto dend_char;
-	  *d++ = code;
-	}
-      else
-	{
-	  k = string_table + code - 0x100;
-	  len = *k++;
-	  if (d + len > de)
-	    goto dend_str;
-	  while (len--)
-	    *d++ = *k++;
-	}
-    }
-  c->source = s;
-  c->dest = d;
-  return CONV_SOURCE_END;
+static int
+conv_std_to_utf16_be(struct conv_context *c)
+{
+#define CONV_READ_STD
+#define CONV_WRITE_UTF16_BE
+#include "charset/charconv-gen.h"
+}
 
-send_utf:
-  if (cc < 0xe0)		{ c->code = cc & 0x1f; c->remains = 1; }
-  else if (cc < 0xf0)		{ c->code = cc & 0x0f; c->remains = 2; }
-  else
-    {
-      c->code = ~0U;
-      if (cc < 0xf8)		c->remains = 3;
-      else if (cc < 0xfc)      	c->remains = 4;
-      else if (cc < 0xfe)	c->remains = 5;
-      else goto nocode;
-    }
-  c->state = UTF8_READ;
-  goto go_slow;
+static int
+conv_utf16_be_to_std(struct conv_context *c)
+{
+#define CONV_READ_UTF16_BE
+#define CONV_WRITE_STD
+#include "charset/charconv-gen.h"
+}
 
-dend_str:
-  c->state = SEQ_WRITE;
-  c->string_at = k;
-  c->remains = len;
-  goto go_slow;
+static int
+conv_std_to_utf16_le(struct conv_context *c)
+{
+#define CONV_READ_STD
+#define CONV_WRITE_UTF16_LE
+#include "charset/charconv-gen.h"
+}
 
-dend_char:
-  c->state = SINGLE_WRITE;
-  c->code = code;
-go_slow:
-  c->source = s;
-  c->dest = d;
-slow:
-  e = conv_slow(c);
-  if (e < 0)
-    {
-      code = c->code;
-      s = c->source;
-      se = c->source_end;
-      d = c->dest;
-      de = c->dest_end;
-      goto got_code;
-    }
-  if (e)
-    return e;
-  goto main;
+static int
+conv_utf16_le_to_std(struct conv_context *c)
+{
+#define CONV_READ_UTF16_LE
+#define CONV_WRITE_STD
+#include "charset/charconv-gen.h"
 }
 
 static int
-conv_to_utf8(struct conv_context *c)
+conv_utf8_to_utf16_be(struct conv_context *c)
 {
-  unsigned short *in_to_x = c->in_to_x;
-  const unsigned char *s, *se;
-  unsigned char *d, *de;
-  unsigned int code;
-  int e;
+#define CONV_READ_UTF8
+#define CONV_WRITE_UTF16_BE
+#include "charset/charconv-gen.h"
+}
 
-  if (unlikely(c->state))
-    goto slow;
+static int
+conv_utf16_be_to_utf8(struct conv_context *c)
+{
+#define CONV_READ_UTF16_BE
+#define CONV_WRITE_UTF8
+#include "charset/charconv-gen.h"
+}
 
-main:
-  s = c->source;
-  se = c->source_end;
-  d = c->dest;
-  de = c->dest_end;
-  while (s < se)
-    {
-      code = x_to_uni[in_to_x[*s]];
-      if (code < 0x80)
-	{
-	  if (d >= de)
-	    goto dend;
-	  *d++ = code;
-	}
-      else if (code < 0x800)
-	{
-	  if (d + 2 > de)
-	    goto dend_utf;
-	  *d++ = 0xc0 | (code >> 6);
-	  *d++ = 0x80 | (code & 0x3f);
-	}
-      else
-	{
-	  if (d + 3 > de)
-	    goto dend_utf;
-	  *d++ = 0xe0 | (code >> 12);
-	  *d++ = 0x80 | ((code >> 6) & 0x3f);
-	  *d++ = 0x80 | (code & 0x3f);
-	}
-      s++;
-    }
-  c->source = s;
-  c->dest = d;
-  return CONV_SOURCE_END;
+static int
+conv_utf8_to_utf16_le(struct conv_context *c)
+{
+#define CONV_READ_UTF8
+#define CONV_WRITE_UTF16_LE
+#include "charset/charconv-gen.h"
+}
 
-dend:
-  c->source = s;
-  c->dest = d;
-  return CONV_DEST_END;
+static int
+conv_utf16_le_to_utf8(struct conv_context *c)
+{
+#define CONV_READ_UTF16_LE
+#define CONV_WRITE_UTF8
+#include "charset/charconv-gen.h"
+}
 
-dend_utf:
-  c->source = s+1;
-  c->dest = d;
-  c->state = UTF8_WRITE_START;
-  c->code = code;
-slow:
-  e = conv_slow(c);
-  if (e)
-    return e;
-  goto main;
+static int
+conv_utf16_be_to_utf16_le(struct conv_context *c)
+{
+#define CONV_READ_UTF16_BE
+#define CONV_WRITE_UTF16_LE
+#include "charset/charconv-gen.h"
 }
 
 static int
@@ -382,14 +412,23 @@ conv_set_charset(struct conv_context *c, int src, int dest)
     c->convert = conv_none;
   else
     {
-      c->convert = conv_standard;
-      if (src == CONV_CHARSET_UTF8)
-	c->convert = conv_from_utf8;
-      else
+      static uns lookup[] = {
+	[CONV_CHARSET_UTF8] = 1,
+	[CONV_CHARSET_UTF16_BE] = 2,
+	[CONV_CHARSET_UTF16_LE] = 3,
+      };
+      static int (*tab[4][4])(struct conv_context *c) = {
+	{ conv_standard,	conv_std_to_utf8,	conv_std_to_utf16_be,	conv_std_to_utf16_le },
+	{ conv_utf8_to_std,	conv_none,		conv_utf8_to_utf16_be,	conv_utf8_to_utf16_le },
+	{ conv_utf16_be_to_std,	conv_utf16_be_to_utf8,	conv_none,		conv_utf16_be_to_utf16_le },
+	{ conv_utf16_le_to_std,	conv_utf16_le_to_utf8,	conv_utf16_be_to_utf16_le,	conv_none },
+      };
+      uns src_idx = ((uns)src < ARRAY_SIZE(lookup)) ? lookup[src] : 0;
+      uns dest_idx = ((uns)dest < ARRAY_SIZE(lookup)) ? lookup[dest] : 0;
+      c->convert = tab[src_idx][dest_idx];
+      if (!src_idx)
 	c->in_to_x = input_to_x[src];
-      if (dest == CONV_CHARSET_UTF8)
-	c->convert = conv_to_utf8;
-      else
+      if (!dest_idx)
 	c->x_to_out = x_to_output[dest];
     }
   c->state = 0;
diff --git a/charset/charconv.h b/charset/charconv.h
index 78b3ac73..ba695ac9 100644
--- a/charset/charconv.h
+++ b/charset/charconv.h
@@ -2,6 +2,7 @@
  *	Character Set Conversion Library 1.2
  *
  *	(c) 1998--2005 Martin Mares <mj@ucw.cz>
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
  *
  *	This software may be freely distributed and used according to the terms
  *	of the GNU Lesser General Public License.
@@ -64,6 +65,8 @@ enum charset_id {
 	CONV_CHARSET_MACCE,
 	CONV_CHARSET_CORK,
 	CONV_CHARSET_UTF8,
+	CONV_CHARSET_UTF16_BE,
+	CONV_CHARSET_UTF16_LE,
 	CONV_NUM_CHARSETS
 };
 
diff --git a/charset/setnames.c b/charset/setnames.c
index 94c7d17e..ff723ec2 100644
--- a/charset/setnames.c
+++ b/charset/setnames.c
@@ -2,6 +2,7 @@
  *	Character Set Conversion Library 1.0 -- Character Set Names
  *
  *	(c) 1998--2005 Martin Mares <mj@ucw.cz>
+ *	(c) 2007 Pavel Charvat <pchar@ucw.cz>
  *
  *	This software may be freely distributed and used according to the terms
  *	of the GNU General Public License.
@@ -39,7 +40,9 @@ static const char *cs_names[] = {
 	"cp852",
 	"x-mac-ce",
 	"x-cork",
-	"utf-8"
+	"utf-8",
+	"utf-16be",
+	"utf-16le"
 };
 
 int
-- 
2.47.3