]> mj.ucw.cz Git - libucw.git/commitdiff
Added functions for reading/writing UTF-8 characters on fastbuf streams.
authorMartin Mares <mj@ucw.cz>
Sun, 21 Jan 2001 17:59:58 +0000 (17:59 +0000)
committerMartin Mares <mj@ucw.cz>
Sun, 21 Jan 2001 17:59:58 +0000 (17:59 +0000)
charset/unistream.h [new file with mode: 0644]

diff --git a/charset/unistream.h b/charset/unistream.h
new file mode 100644 (file)
index 0000000..5483d80
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ *     The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _UNISTREAM_H
+#define _UNISTREAM_H
+
+/* FIXME: Do these need to be inline? */
+
+static inline int
+bget_utf8(struct fastbuf *b)
+{
+  int c = bgetc(b);
+  int code;
+
+  if (c < 0x80)                                /* Includes EOF */
+    return c;
+  if (c < 0xc0)                                /* Incorrect combination */
+    return UNI_REPLACEMENT;
+  if (c >= 0xf0)                       /* Too large, skip it */
+    {
+      while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+       ;
+      goto wrong;
+    }
+  if (c >= 0xe0)                       /* 3 bytes */
+    {
+      code = c & 0x0f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  else                                 /* 2 bytes */
+    {
+      code = c & 0x1f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  return code;
+
+ wrong:
+  if (c >= 0)
+    bungetc(b, c);
+  return UNI_REPLACEMENT;
+}
+
+static inline void
+bput_utf8(struct fastbuf *b, uns u)
+{
+  ASSERT(u < 65536);
+  if (u < 0x80)
+    bputc(b, u);
+  else
+    {
+      if (u < 0x800)
+       bputc(b, 0xc0 | (u >> 6));
+      else
+       {
+         bputc(b, 0xe0 | (u >> 12));
+         bputc(b, 0x80 | ((u >> 6) & 0x3f));
+       }
+      bputc(b, 0x80 | (u & 0x3f));
+    }
+}
+
+#endif