From: Pavel Charvat Date: Sat, 4 Mar 2006 18:10:05 +0000 (+0100) Subject: Conversion between charsets with allocation on the stack X-Git-Tag: holmes-import~666 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=7af80cdb684884505ceeb2d71414b2b9c24ab02b;p=libucw.git Conversion between charsets with allocation on the stack or a given memory pool. I will propably add some optimalizations... --- diff --git a/charset/Makefile b/charset/Makefile index 64a8119e..dc8f4681 100644 --- a/charset/Makefile +++ b/charset/Makefile @@ -2,8 +2,8 @@ DIRS+=charset -LIBCHARSET_MODS=toupper tolower tocat toligatures unaccent charconv setnames fb-charconv -LIBCHARSET_INCLUDES=charconv.h unicat.h fb-charconv.h +LIBCHARSET_MODS=toupper tolower tocat toligatures unaccent charconv setnames fb-charconv stk-charconv mp-charconv +LIBCHARSET_INCLUDES=charconv.h unicat.h fb-charconv.h stk-charconv.h mp-charconv.h $(o)/charset/libcharset.a: $(addsuffix .o,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS))) $(o)/charset/libcharset.so: $(addsuffix .oo,$(addprefix $(o)/charset/,$(LIBCHARSET_MODS))) diff --git a/charset/charconv.h b/charset/charconv.h index d3482438..d14493c1 100644 --- a/charset/charconv.h +++ b/charset/charconv.h @@ -7,6 +7,9 @@ * of the GNU Lesser General Public License. */ +#ifndef _CHARCONV_H +#define _CHARCONV_H + struct conv_context { /* Parameters supplied by the caller */ @@ -77,3 +80,5 @@ unsigned int conv_x_count(void); int find_charset_by_name(char *); char *charset_name(int); + +#endif diff --git a/charset/mp-charconv.c b/charset/mp-charconv.c new file mode 100644 index 00000000..731d591e --- /dev/null +++ b/charset/mp-charconv.c @@ -0,0 +1,17 @@ +/* + * Sherlock Library -- Character Conversion with Allocation on a Memory Pool + * + * (c) 2006 Pavel Charvat + */ + +#include "lib/lib.h" +#include "lib/mempool.h" +#include "charset/mp-charconv.h" +#include "charset/stk-charconv.h" +#include + +byte * +mp_conv(struct mempool *mp, byte *s, uns in_cs, uns out_cs) +{ + return mp_strdup(mp, stk_conv(s, in_cs, out_cs)); +} diff --git a/charset/mp-charconv.h b/charset/mp-charconv.h new file mode 100644 index 00000000..6519535a --- /dev/null +++ b/charset/mp-charconv.h @@ -0,0 +1,24 @@ +/* + * Sherlock Library -- Character Conversion with Allocation on a Memory Pool + * + * (c) 2006 Pavel Charvat + */ + +#ifndef _MP_CHARCONV_H +#define _MP_CHARCONV_H + +#include "lib/mempool.h" +#include "charset/charconv.h" + +byte * +mp_conv(struct mempool *mp, byte *s, uns cs_in, uns cs_out); + +static inline byte * +mp_conv_to_utf8(struct mempool *mp, byte *s, uns cs_in) +{ return mp_conv(mp, s, cs_in, CONV_CHARSET_UTF8); } + +static inline byte * +mp_conv_from_utf8(struct mempool *mp, byte *s, uns cs_out) +{ return mp_conv(mp, s, CONV_CHARSET_UTF8, cs_out); } + +#endif diff --git a/charset/stk-charconv.c b/charset/stk-charconv.c new file mode 100644 index 00000000..6550ea35 --- /dev/null +++ b/charset/stk-charconv.c @@ -0,0 +1,32 @@ +/* + * Sherlock Library -- Character Conversion with Allocation on the Stack + * + * (c) 2006 Pavel Charvat + */ + +#include "lib/lib.h" +#include "charset/stk-charconv.h" +#include + +uns +stk_conv_internal(struct conv_context *c, byte *s, uns in_cs, uns out_cs) +{ + /* We do not allocate anything for identical charsets. */ + if (in_cs == out_cs) + { + c->dest_start = s; + return 0; + } + + uns l = strlen(s); + + conv_init(c); + conv_set_charset(c, in_cs, out_cs); + c->source = s; + c->source_end = s + l + 1; + + /* Resulting string can be longer after the conversion. + * The following constatnt must be at least 4 for conversion to UTF-8 + * and at least the maximum length of the strings in string_table for other charsets. */ + return 4 * l + 1; +} diff --git a/charset/stk-charconv.h b/charset/stk-charconv.h new file mode 100644 index 00000000..c0d8568f --- /dev/null +++ b/charset/stk-charconv.h @@ -0,0 +1,23 @@ +/* + * Sherlock Library -- Character Conversion with Allocation on the Stack + * + * (c) 2006 Pavel Charvat + */ + +#ifndef _STK_CHARCONV_H +#define _STK_CHARCONV_H + +#include "charset/charconv.h" +#include + +#define stk_conv(s, cs_in, cs_out) \ + ({ struct conv_context _c; uns _l=stk_conv_internal(&_c, (s), (cs_in), (cs_out)); \ + if (_l) { _c.dest=_c.dest_start=alloca(_l); _c.dest_end=_c.dest+_l; conv_run(&_c); } \ + _c.dest_start; }) + +#define stk_conv_to_utf8(s, cs_in) stk_conv(s, cs_in, CONV_CHARSET_UTF8) +#define stk_conv_from_utf8(s, cs_out) stk_conv(s, CONV_CHARSET_UTF8, cs_out) + +uns stk_conv_internal(struct conv_context *, byte *, uns, uns); + +#endif