Slow case of b(get|put)_utf8 no longer inline.

author Martin Mares <mj@ucw.cz>

Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)

committer Martin Mares <mj@ucw.cz>

Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)
author Martin Mares <mj@ucw.cz>
Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)
committer Martin Mares <mj@ucw.cz>
Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)
diff --git a/charset/Makefile b/charset/Makefile

index 6454e26f56803cc690a048b4faf57fb998a852b8..40423122fc2daae7b542c7e4e9acd0e7862361b3 100644 (file)
--- a/charset/Makefile
+++ b/charset/Makefile
@@ -3,7 +3,7 @@
  DIRS+=charset
  
  UNI_OBJS=toupper.o tolower.o tocat.o utf8.o unaccent.o strlen.o debug.o \
-       charconv.o setnames.o
+       charconv.o setnames.o unistream.o
  
  obj/charset/libcharset.a: $(addprefix obj/charset/,$(UNI_OBJS))
  
diff --git a/charset/unistream.c b/charset/unistream.c

new file mode 100644 (file)

index 0000000..0e7bbb9
--- /dev/null
+++ b/charset/unistream.c
@@ -0,0 +1,70 @@
+/*
+ *     The UniCode Library: Reading and writing of UTF-8 on Fastbuf Streams
+ *
+ *     (c) 2001 Martin Mares <mj@ucw.cz>
+ */
+
+#include "lib/lib.h"
+#include "lib/fastbuf.h"
+#include "charset/unicode.h"
+#include "charset/unistream.h"
+
+int
+bget_utf8_slow(struct fastbuf *b)
+{
+  int c = bgetc(b);
+  int code;
+
+  if (c < 0x80)                                /* Includes EOF */
+    return c;
+  if (c < 0xc0)                                /* Incorrect combination */
+    return UNI_REPLACEMENT;
+  if (c >= 0xf0)                       /* Too large, skip it */
+    {
+      while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
+       ;
+      goto wrong;
+    }
+  if (c >= 0xe0)                       /* 3 bytes */
+    {
+      code = c & 0x0f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  else                                 /* 2 bytes */
+    {
+      code = c & 0x1f;
+      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
+       goto wrong;
+      code = (code << 6) | (c & 0x3f);
+    }
+  return code;
+
+ wrong:
+  if (c >= 0)
+    bungetc(b, c);
+  return UNI_REPLACEMENT;
+}
+
+void
+bput_utf8_slow(struct fastbuf *b, uns u)
+{
+  ASSERT(u < 65536);
+  if (u < 0x80)
+    bputc(b, u);
+  else
+    {
+      if (u < 0x800)
+       bputc(b, 0xc0 | (u >> 6));
+      else
+       {
+         bputc(b, 0xe0 | (u >> 12));
+         bputc(b, 0x80 | ((u >> 6) & 0x3f));
+       }
+      bputc(b, 0x80 | (u & 0x3f));
+    }
+}
diff --git a/charset/unistream.h b/charset/unistream.h

index 5483d8097b22f8b509da87605922e85a271a60f5..3eff996950f8e3821ecaacbf58d041ba3f43048e 100644 (file)
--- a/charset/unistream.h
+++ b/charset/unistream.h
@@ -7,66 +7,31 @@
  #ifndef _UNISTREAM_H
  #define _UNISTREAM_H
  
-/* FIXME: Do these need to be inline? */
+int bget_utf8_slow(struct fastbuf *b);
+void bput_utf8_slow(struct fastbuf *b, uns u);
  
  static inline int
  bget_utf8(struct fastbuf *b)
  {
-  int c = bgetc(b);
-  int code;
+  uns u;
  
-  if (c < 0x80)                                /* Includes EOF */
-    return c;
-  if (c < 0xc0)                                /* Incorrect combination */
-    return UNI_REPLACEMENT;
-  if (c >= 0xf0)                       /* Too large, skip it */
+  if (b->bptr + 5 <= b->bufend)
      {
-      while ((c = bgetc(b)) >= 0x80 && c < 0xc0)
-       ;
-      goto wrong;
+      GET_UTF8(b->bptr, u);
+      return u;
      }
-  if (c >= 0xe0)                       /* 3 bytes */
-    {
-      code = c & 0x0f;
-      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
-       goto wrong;
-      code = (code << 6) | (c & 0x3f);
-      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
-       goto wrong;
-      code = (code << 6) | (c & 0x3f);
-    }
-  else                                 /* 2 bytes */
-    {
-      code = c & 0x1f;
-      if ((c = bgetc(b)) < 0x80 || c >= 0xc0)
-       goto wrong;
-      code = (code << 6) | (c & 0x3f);
-    }
-  return code;
-
- wrong:
-  if (c >= 0)
-    bungetc(b, c);
-  return UNI_REPLACEMENT;
+  else
+    return bget_utf8_slow(b);
  }
  
  static inline void
  bput_utf8(struct fastbuf *b, uns u)
  {
    ASSERT(u < 65536);
-  if (u < 0x80)
-    bputc(b, u);
+  if (b->bptr + 5 <= b->bufend)
+    PUT_UTF8(b->bptr, u);
    else
-    {
-      if (u < 0x800)
-       bputc(b, 0xc0 | (u >> 6));
-      else
-       {
-         bputc(b, 0xe0 | (u >> 12));
-         bputc(b, 0x80 | ((u >> 6) & 0x3f));
-       }
-      bputc(b, 0x80 | (u & 0x3f));
-    }
+    bput_utf8_slow(b, u);
  }
  
  #endif
author	Martin Mares <mj@ucw.cz>
	Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)
committer	Martin Mares <mj@ucw.cz>
	Tue, 27 Mar 2001 10:28:31 +0000 (10:28 +0000)
charset/Makefile		patch \| blob \| history
charset/unistream.c	[new file with mode: 0644]	patch \| blob
charset/unistream.h		patch \| blob \| history