From: Pavel Charvat Date: Sun, 5 Mar 2006 22:26:54 +0000 (+0100) Subject: Charset conversion with allocation on the stack do not X-Git-Tag: holmes-import~664 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=47fec72b685482a58561be68759e9a8f9059e6b5;p=libucw.git Charset conversion with allocation on the stack do not depend on string_table's lengths now. --- diff --git a/charset/misc/gen-charconv b/charset/misc/gen-charconv index a731d9ab..ab22b156 100755 --- a/charset/misc/gen-charconv +++ b/charset/misc/gen-charconv @@ -182,9 +182,6 @@ print "static unsigned char string_table[] = {\n"; $i = 256; while ($i < $pstr) { $w = $strval{$i}; - if (length($w) > 3) { - print STDERR "Warning: Entries should have at most 3 characters, see charset/stk-charconv.c\n"; - } print length $w, ","; foreach $x (unpack("C256", $w)) { print " $x,"; diff --git a/charset/stk-charconv.c b/charset/stk-charconv.c index 3fbc07bb..2d5c2066 100644 --- a/charset/stk-charconv.c +++ b/charset/stk-charconv.c @@ -8,25 +8,90 @@ #include "charset/stk-charconv.h" #include -uns -stk_conv_internal(struct conv_context *c, byte *s, uns in_cs, uns out_cs) +#define INITIAL_MIN_SIZE 16 +#define INITIAL_SCALE 2 + +void +stk_conv_init(struct stk_conv_context *c, byte *s, uns in_cs, uns out_cs) { - /* We do not allocate anything for identical charsets. */ + uns l = strlen(s); + c->count = 0; + + /* For in_cs == out_cs, we emulate stk_strdup */ if (in_cs == out_cs) + { + c->size[0] = c->request = l + 1; + c->buf[0] = s; + c->c.source = NULL; + return; + } + + /* Initialization */ + conv_init(&c->c); + conv_set_charset(&c->c, in_cs, out_cs); + c->c.source = s; + c->c.source_end = s + l + 1; + c->sum = 0; + + /* Size of the first buffer */ + if (l < (INITIAL_MIN_SIZE - 1) / INITIAL_SCALE) + c->request = INITIAL_MIN_SIZE; + else + c->request = l * INITIAL_SCALE + 1; +} + +int +stk_conv_step(struct stk_conv_context *c, byte *buf) +{ + /* Merge all buffers to the new one and exit */ + if (!c->c.source) + { + c->c.dest_start = buf; + for (uns i = 0; i <= c->count; i++) { - c->dest_start = s; - return 0; + memcpy(buf, c->buf[i], c->size[i]); + buf += c->size[i]; } + return 0; + } - uns l = strlen(s); + /* Run conv_run using the new buffer */ + c->buf[c->count] = c->c.dest_start = c->c.dest = buf; + c->c.dest_end = buf + c->request; + if (!(conv_run(&c->c) & CONV_SOURCE_END)) + { + + /* Buffer is too small, continue with a new one */ + c->size[c->count++] = c->request; + c->sum += c->request; + c->request <<= 1; /* This can be freely changed */ + return 1; + } + + /* We have used only one buffer for the conversion, no merges are needed */ + if (!c->count) + return 0; - conv_init(c); - conv_set_charset(c, in_cs, out_cs); - c->source = s; - c->source_end = s + l + 1; - - /* Resulting string can be longer after the conversion. - * The following constant must be at least 3 for conversion to UTF-8 - * and at least the maximum length of the strings in string_table for other charsets. */ - return 3 * l + 1; + /* We can merge everything to the current buffer ... */ + uns s = c->c.dest - c->c.dest_start; + if (c->sum + s <= c->request) + { + memmove(buf + c->sum, buf, s); + for (uns i = 0; i < c->count; i++) + { + memcpy(buf, c->buf[i], c->size[i]); + buf += c->size[i]; + } + return 0; + } + + /* ... or we allocate a new one */ + else + { + c->request = c->sum + s; + c->size[c->count] = s; + c->c.source = NULL; + return 1; + } } + diff --git a/charset/stk-charconv.h b/charset/stk-charconv.h index d63ba746..d8ae585c 100644 --- a/charset/stk-charconv.h +++ b/charset/stk-charconv.h @@ -10,18 +10,27 @@ #include "charset/charconv.h" #include -/* The following macros convert strings between given charsets (CONV_CHARSET_x). - * The resulting string is allocated on the stack with the exception of cs_in == cs_out, - * when the pointer to the input string is returned. */ +/* The following macros convert strings between given charsets (CONV_CHARSET_x). */ #define stk_conv(s, cs_in, cs_out) \ - ({ struct conv_context _c; uns _l=stk_conv_internal(&_c, (s), (cs_in), (cs_out)); \ - if (_l) { _c.dest=_c.dest_start=alloca(_l); _c.dest_end=_c.dest+_l; conv_run(&_c); } \ - _c.dest_start; }) + ({ struct stk_conv_context _c; stk_conv_init(&_c, (s), (cs_in), (cs_out)); \ + while (stk_conv_step(&_c, alloca(_c.request))); _c.c.dest_start; }) #define stk_conv_to_utf8(s, cs_in) stk_conv(s, cs_in, CONV_CHARSET_UTF8) #define stk_conv_from_utf8(s, cs_out) stk_conv(s, CONV_CHARSET_UTF8, cs_out) - -uns stk_conv_internal(struct conv_context *c, byte *s, uns cs_in, uns cs_out); + +/* Internal structure and routines. */ + +struct stk_conv_context { + struct conv_context c; + uns count; + uns sum; + uns request; + byte *buf[16]; + uns size[16]; +}; + +void stk_conv_init(struct stk_conv_context *c, byte *s, uns cs_in, uns cs_out); +int stk_conv_step(struct stk_conv_context *c, byte *buf); #endif