lib.h log.h tbf.h threads.h time.h \
alloc.h mempool.h eltpool.h \
clists.h slists.h simple-lists.h \
- string.h stkstring.h unicode.h varint.h chartype.h regex.h \
+ string.h stkstring.h unicode.h unicode-gen.h varint.h chartype.h regex.h \
wildmatch.h \
unaligned.h \
bbuf.h gbuf.h gary.h bitarray.h bitsig.h \
*
* (c) 2001--2015 Martin Mares <mj@ucw.cz>
* (c) 2004 Robert Spalek <robert@ucw.cz>
+ * (c) 2017 Pavel Charvat <pchar@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
int main(int argc, char **argv)
{
#define FUNCS \
- F(BGET_UTF8) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
- F(BPUT_UTF8) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
+ F(BGET_UTF8) F(BGET_UTF8_FULL) F(BGET_UTF8_32) F(BGET_UTF16_BE) F(BGET_UTF16_LE) \
+ F(BPUT_UTF8) F(BPUT_UTF8_FULL) F(BPUT_UTF8_32) F(BPUT_UTF16_BE) F(BPUT_UTF16_LE)
enum {
#define F(x) FUNC_##x,
case FUNC_BGET_UTF8:
u = bget_utf8_slow(b, UNI_REPLACEMENT);
break;
+ case FUNC_BGET_UTF8_FULL:
+ u = bget_utf8_full_slow(b, UNI_REPLACEMENT);
+ break;
case FUNC_BGET_UTF8_32:
u = bget_utf8_32_slow(b, UNI_REPLACEMENT);
break;
case FUNC_BPUT_UTF8:
bput_utf8_slow(b, u);
break;
+ case FUNC_BPUT_UTF8_FULL:
+ bput_utf8_full_slow(b, u);
+ break;
case FUNC_BPUT_UTF8_32:
bput_utf8_32_slow(b, u);
break;
*
* (c) 2001--2015 Martin Mares <mj@ucw.cz>
* (c) 2004 Robert Spalek <robert@ucw.cz>
- * (c) 2007--2008 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007--2017 Pavel Charvat <pchar@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
In: c1 bf e0 9f bf
Out: fffc fffc
+Name: bput_utf8_full
+Run: ../obj/ucw/ff-unicode-t bput_utf8_full
+In: 15a5a 2a5a5 5a5a5 a5a5a 10ffff
+Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f4 8f bf bf
+
+Name: bget_utf8_full
+Run: ../obj/ucw/ff-unicode-t bget_utf8_full
+In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f4 8f bf bf
+Out: 15a5a 2a5a5 5a5a5 a5a5a 10ffff
+
+Name: bget_utf8_full garbage
+In: fe 83 81 f4 90 80 80
+Out: fffc fffc fffc fffc
+
+Name: bget_utf8_full denormalized
+In: c1 bf e0 9f bf f0 8f bf bf
+Out: fffc fffc fffc
+
Name: bput_utf8_32
Run: ../obj/ucw/ff-unicode-t bput_utf8_32
In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a
+/*
+ * UCW Library -- Generator of UTF-8 functions (for internal usage only, don't include it directly)
+ *
+ * (c) 2017 Pavel Charvat <pchar@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
#if defined(UNI_WANT_UTF8_32)
#define UNI_MAX_UTF8_BYTES 6
#elif defined(UNI_WANT_UTF8_FULL)
ASSERT(0);
}
+#undef UNI_PUT_NEXT
+
#endif
/* Reading UTF-8 */
UNI_GIVE_OK;
}
+#undef UNI_GET_NEXT
+
#endif
+/* Clean macros before next usage */
+
#undef UNI_WANT_PUT_UTF8
#undef UNI_WANT_GET_UTF8
#undef UNI_GIVE_SKIPC
#undef UNI_GIVE_OK
#undef UNI_GIVE_BAD
-
-#undef UNI_PUT_NEXT
-#undef UNI_GET_NEXT
*
* (c) 1997--2004 Martin Mares <mj@ucw.cz>
* (c) 2003 Robert Spalek <robert@ucw.cz>
+ * (c) 2017 Pavel Charvat <pchar@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
byte buf[256];
#define FUNCS \
- F(UTF8_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
- F(UTF8_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
+ F(UTF8_GET) F(UTF8_FULL_GET) F(UTF8_32_GET) F(UTF16_BE_GET) F(UTF16_LE_GET) \
+ F(UTF8_PUT) F(UTF8_FULL_PUT) F(UTF8_32_PUT) F(UTF16_BE_PUT) F(UTF16_LE_PUT)
enum {
#define F(x) FUNC_##x,
case FUNC_UTF8_GET:
p = utf8_get(p, &u);
break;
+ case FUNC_UTF8_FULL_GET:
+ p = utf8_full_get(p, &u);
+ break;
case FUNC_UTF8_32_GET:
p = utf8_32_get(p, &u);
break;
case FUNC_UTF8_PUT:
p = utf8_put(p, u);
break;
+ case FUNC_UTF8_FULL_PUT:
+ p = utf8_full_put(p, u);
+ break;
case FUNC_UTF8_32_PUT:
p = utf8_32_put(p, u);
break;
*
* (c) 1997--2004 Martin Mares <mj@ucw.cz>
* (c) 2004 Robert Spalek <robert@ucw.cz>
- * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2007--2017 Pavel Charvat <pchar@ucw.cz>
*
* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
In: c1 bf e0 9f bf
Out: fffc fffc
+Name: utf8_full_put
+Run: ../obj/ucw/unicode-t utf8_full_put
+In: 15a5a 2a5a5 5a5a5 a5a5a 10ffff
+Out: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f4 8f bf bf
+
+Name: utf8_full_get
+Run: ../obj/ucw/unicode-t utf8_full_get
+In: f0 95 a9 9a f0 aa 96 a5 f1 9a 96 a5 f2 a5 a9 9a f4 8f bf bf
+Out: 15a5a 2a5a5 5a5a5 a5a5a 10ffff
+
+Name: utf8_full_get garbage
+In: fe 83 81 f4 90 80 80
+Out: fffc fffc fffc fffc
+
+Name: utf8_full_get denormalized
+In: c1 bf e0 9f bf f0 8f bf bf
+Out: fffc fffc fffc
+
Name: utf8_32_put
Run: ../obj/ucw/unicode-t utf8_32_put
In: 15a5a 2a5a5 5a5a5 a5a5a 15a5a5 2a5a5a 5a5a5a a5a5a5 15a5a5a 2a5a5a5 5a5a5a5 a5a5a5a 15a5a5a5 2a5a5a5a 5a5a5a5a