X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=charset%2Fcharconv.c;h=2418f4d2567223672dcedfd6226d5848d13640e8;hb=1bc3bb66e47ec02003658fb3040aef0ffd7b7540;hp=36796a7bf2f7e47e37bfc27c1e1c1cf9eed6f7b3;hpb=39359f268c445fa4a4defb38e644e519c58660f4;p=libucw.git diff --git a/charset/charconv.c b/charset/charconv.c index 36796a7b..2418f4d2 100644 --- a/charset/charconv.c +++ b/charset/charconv.c @@ -8,11 +8,11 @@ * of the GNU Lesser General Public License. */ -#include "lib/lib.h" -#include "lib/unicode.h" -#include "lib/unaligned.h" -#include "charset/charconv.h" -#include "charset/chartable.h" +#include +#include +#include +#include +#include void conv_init(struct conv_context *c) @@ -89,6 +89,7 @@ seq: } if (c->code >= 0x10000) c->code = 0xfffd; +got_char: c->source = s; c->state = 0; return -1; @@ -133,19 +134,23 @@ seq: { void *p = &c->code; c->string_at = p; - if (c->code < 0xd800 || c->code - 0xe000 < 0x2000) + uint code = c->code; + c->string_at = p; + if (code < 0xd800 || code - 0xe000 < 0x2000) {} - else if ((c->code -= 0x10000) < 0x100000) + else if ((code -= 0x10000) < 0x100000) { - put_u16_be(p, 0xd800 | (c->code >> 10)); - put_u16_be(p + 2, 0xdc00 | (c->code & 0x3ff)); + put_u16_be(p, 0xd800 | (code >> 10)); + put_u16_be(p + 2, 0xdc00 | (code & 0x3ff)); c->remains = 4; + c->state = SEQ_WRITE; goto seq; } else - c->code = UNI_REPLACEMENT; - put_u16_be(p, c->code); + code = UNI_REPLACEMENT; + put_u16_be(p, code); c->remains = 2; + c->state = SEQ_WRITE; goto seq; } @@ -154,18 +159,22 @@ seq: { void *p = &c->code; c->string_at = p; - if (c->code < 0xd800 || c->code - 0xe000 < 0x2000) + uint code = c->code; + c->string_at = p; + if (code < 0xd800 || code - 0xe000 < 0x2000) {} - else if ((c->code -= 0x10000) < 0x100000) + else if ((code -= 0x10000) < 0x100000) { - put_u16_le(p, 0xd800 | (c->code >> 10)); - put_u16_le(p + 2, 0xdc00 | (c->code & 0x3ff)); + put_u16_le(p, 0xd800 | (code >> 10)); + put_u16_le(p + 2, 0xdc00 | (code & 0x3ff)); c->remains = 4; + c->state = SEQ_WRITE; } else - c->code = UNI_REPLACEMENT; - put_u16_le(p, c->code); + code = UNI_REPLACEMENT; + put_u16_le(p, code); c->remains = 2; + c->state = SEQ_WRITE; goto seq; } @@ -181,7 +190,7 @@ seq: goto cse; c->code = (c->code << 8) | *s++; if (c->code - 0xd800 >= 0x800) - break; + goto got_char; c->code = (c->code - 0xd800) << 10; c->state = UTF16_BE_READ_2; /* fall-thru */ @@ -203,7 +212,7 @@ seq: else c->code = UNI_REPLACEMENT; s++; - break; + goto got_char; /* Reading of UTF16-LE */ case UTF16_LE_READ: @@ -217,7 +226,7 @@ seq: goto cse; c->code |= *s++ << 8; if (c->code - 0xd800 >= 0x800) - break; + goto got_char; c->code = (c->code - 0xd800) << 10; c->state = UTF16_LE_READ_2; /* fall-thru */ @@ -235,7 +244,7 @@ seq: else c->code = UNI_REPLACEMENT; s++; - break; + goto got_char; default: ASSERT(0); @@ -261,7 +270,7 @@ conv_std_to_utf8(struct conv_context *c) { #define CONV_READ_STD #define CONV_WRITE_UTF8 -#include "charset/charconv-gen.h" +#include } static int @@ -269,7 +278,7 @@ conv_utf8_to_std(struct conv_context *c) { #define CONV_READ_UTF8 #define CONV_WRITE_STD -#include "charset/charconv-gen.h" +#include } static int @@ -277,7 +286,7 @@ conv_std_to_utf16_be(struct conv_context *c) { #define CONV_READ_STD #define CONV_WRITE_UTF16_BE -#include "charset/charconv-gen.h" +#include } static int @@ -285,7 +294,7 @@ conv_utf16_be_to_std(struct conv_context *c) { #define CONV_READ_UTF16_BE #define CONV_WRITE_STD -#include "charset/charconv-gen.h" +#include } static int @@ -293,7 +302,7 @@ conv_std_to_utf16_le(struct conv_context *c) { #define CONV_READ_STD #define CONV_WRITE_UTF16_LE -#include "charset/charconv-gen.h" +#include } static int @@ -301,7 +310,7 @@ conv_utf16_le_to_std(struct conv_context *c) { #define CONV_READ_UTF16_LE #define CONV_WRITE_STD -#include "charset/charconv-gen.h" +#include } static int @@ -309,7 +318,7 @@ conv_utf8_to_utf16_be(struct conv_context *c) { #define CONV_READ_UTF8 #define CONV_WRITE_UTF16_BE -#include "charset/charconv-gen.h" +#include } static int @@ -317,7 +326,7 @@ conv_utf16_be_to_utf8(struct conv_context *c) { #define CONV_READ_UTF16_BE #define CONV_WRITE_UTF8 -#include "charset/charconv-gen.h" +#include } static int @@ -325,7 +334,7 @@ conv_utf8_to_utf16_le(struct conv_context *c) { #define CONV_READ_UTF8 #define CONV_WRITE_UTF16_LE -#include "charset/charconv-gen.h" +#include } static int @@ -333,7 +342,7 @@ conv_utf16_le_to_utf8(struct conv_context *c) { #define CONV_READ_UTF16_LE #define CONV_WRITE_UTF8 -#include "charset/charconv-gen.h" +#include } static int @@ -341,7 +350,7 @@ conv_utf16_be_to_utf16_le(struct conv_context *c) { #define CONV_READ_UTF16_BE #define CONV_WRITE_UTF16_LE -#include "charset/charconv-gen.h" +#include } static int @@ -351,7 +360,7 @@ conv_standard(struct conv_context *c) unsigned short *x_to_out = c->x_to_out; const unsigned char *s, *se; unsigned char *d, *de, *k; - unsigned int len, e; + uint len, e; if (unlikely(c->state)) goto slow; @@ -363,7 +372,7 @@ main: de = c->dest_end; while (s < se) { - unsigned int code = x_to_out[in_to_x[*s]]; + uint code = x_to_out[in_to_x[*s]]; if (code < 0x100) { if (unlikely(d >= de)) @@ -409,10 +418,14 @@ conv_set_charset(struct conv_context *c, int src, int dest) c->source_charset = src; c->dest_charset = dest; if (src == dest) - c->convert = conv_none; + { + c->convert = conv_none; + c->in_to_x = NULL; + c->x_to_out = NULL; + } else { - static uns lookup[] = { + static uint lookup[] = { [CONV_CHARSET_UTF8] = 1, [CONV_CHARSET_UTF16_BE] = 2, [CONV_CHARSET_UTF16_LE] = 3, @@ -423,44 +436,42 @@ conv_set_charset(struct conv_context *c, int src, int dest) { conv_utf16_be_to_std, conv_utf16_be_to_utf8, conv_none, conv_utf16_be_to_utf16_le }, { conv_utf16_le_to_std, conv_utf16_le_to_utf8, conv_utf16_be_to_utf16_le, conv_none }, }; - uns src_idx = ((uns)src < ARRAY_SIZE(lookup)) ? lookup[src] : 0; - uns dest_idx = ((uns)dest < ARRAY_SIZE(lookup)) ? lookup[dest] : 0; + uint src_idx = ((uint)src < ARRAY_SIZE(lookup)) ? lookup[src] : 0; + uint dest_idx = ((uint)dest < ARRAY_SIZE(lookup)) ? lookup[dest] : 0; c->convert = tab[src_idx][dest_idx]; - if (!src_idx) - c->in_to_x = input_to_x[src]; - if (!dest_idx) - c->x_to_out = x_to_output[dest]; + c->in_to_x = src_idx ? NULL : input_to_x[src]; + c->x_to_out = dest_idx ? NULL : x_to_output[dest]; } c->state = 0; } -unsigned int -conv_x_to_ucs(unsigned int x) +uint +conv_x_to_ucs(uint x) { return x_to_uni[x]; } -unsigned int -conv_ucs_to_x(unsigned int ucs) +uint +conv_ucs_to_x(uint ucs) { return uni_to_x[ucs >> 8U][ucs & 0xff]; } -unsigned int +uint conv_x_count(void) { return sizeof(x_to_uni) / sizeof(x_to_uni[0]); } int -conv_in_to_ucs(struct conv_context *c, unsigned int y) +conv_in_to_ucs(struct conv_context *c, uint y) { return x_to_uni[c->in_to_x[y]]; } -int conv_ucs_to_out(struct conv_context *c, unsigned int ucs) +int conv_ucs_to_out(struct conv_context *c, uint ucs) { - uns x = uni_to_x[ucs >> 8U][ucs & 0xff]; + uint x = uni_to_x[ucs >> 8U][ucs & 0xff]; if (x == 256 || c->x_to_out[x] >= 256) return -1; else