2 * Character Set Conversion Library 1.2
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include "charset/charconv.h"
12 #include "charset/chartable.h"
15 conv_init(struct conv_context *c)
17 c->source = c->source_end = NULL;
18 c->dest = c->dest_start = c->dest_end = NULL;
22 conv_none(struct conv_context *c)
24 c->dest_start = (char *) c->source;
25 c->dest = (char *) c->source_end;
26 return CONV_SOURCE_END | CONV_DEST_END | CONV_SKIP;
39 conv_slow(struct conv_context *c)
41 const unsigned char *s = c->source;
42 const unsigned char *se = c->source_end;
43 unsigned char *d = c->dest;
44 unsigned char *de = c->dest_end;
58 *d++ = *c->string_at++;
67 if ((*s & 0xc0) != 0x80)
72 c->code = (c->code << 6) | (*s++ & 0x3f);
75 if (c->code >= 0x10000)
80 case UTF8_WRITE_START:
88 else if (c->code < 0x800)
90 *d++ = 0xc0 | (c->code >> 6);
96 *d++ = 0xe0 | (c->code >> 12);
100 c->state = UTF8_WRITE_CONT;
102 case UTF8_WRITE_CONT:
107 *d++ = 0x80 | (c->code >> 10);
122 return CONV_SOURCE_END;
126 return CONV_DEST_END;
130 conv_from_utf8(struct conv_context *c)
132 unsigned short *x_to_out = c->x_to_out;
133 const unsigned char *s, *se;
134 unsigned char *d, *de, *k;
135 unsigned int code, cc, len;
138 if (unlikely(c->state))
146 while (s < se) /* Optimized for speed, beware of spaghetti code */
157 if ((s[0] & 0xc0) != 0x80)
160 code = (code << 6) | (*s++ & 0x3f);
164 if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
167 code = (code << 6) | (*s++ & 0x3f);
168 code = (code << 6) | (*s++ & 0x3f);
174 if ((*s++ & 0xc0) != 0x80)
189 code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
198 k = string_table + code - 0x100;
208 return CONV_SOURCE_END;
211 c->state = UTF8_WRITE_START;
212 if (cc < 0xe0) { c->code = cc & 0x1f; c->remains = 1; }
213 else if (cc < 0xf0) { c->code = cc & 0x0f; c->remains = 2; }
217 if (cc < 0xf8) c->remains = 3;
218 else if (cc < 0xfc) c->remains = 4;
219 else if (cc < 0xfe) c->remains = 5;
225 c->state = SEQ_WRITE;
230 c->state = SINGLE_WRITE;
253 conv_to_utf8(struct conv_context *c)
255 unsigned short *in_to_x = c->in_to_x;
256 const unsigned char *s, *se;
257 unsigned char *d, *de;
261 if (unlikely(c->state))
271 code = x_to_uni[in_to_x[*s]];
278 else if (code < 0x800)
282 *d++ = 0xc0 | (code >> 6);
283 *d++ = 0x80 | (code & 0x3f);
289 *d++ = 0xe0 | (code >> 12);
290 *d++ = 0x80 | ((code >> 6) & 0x3f);
291 *d++ = 0x80 | (code & 0x3f);
297 return CONV_SOURCE_END;
302 return CONV_DEST_END;
307 c->state = UTF8_WRITE_START;
317 conv_standard(struct conv_context *c)
319 unsigned short *in_to_x = c->in_to_x;
320 unsigned short *x_to_out = c->x_to_out;
321 const unsigned char *s, *se;
322 unsigned char *d, *de, *k;
325 if (unlikely(c->state))
335 unsigned int code = x_to_out[in_to_x[*s]];
338 if (unlikely(d >= de))
344 k = string_table + code - 0x100;
346 if (unlikely(d + len > de))
355 return CONV_SOURCE_END;
360 return CONV_DEST_END;
365 c->state = SEQ_WRITE;
376 conv_set_charset(struct conv_context *c, int src, int dest)
378 c->source_charset = src;
379 c->dest_charset = dest;
381 c->convert = conv_none;
384 c->convert = conv_standard;
385 if (src == CONV_CHARSET_UTF8)
386 c->convert = conv_from_utf8;
388 c->in_to_x = input_to_x[src];
389 if (dest == CONV_CHARSET_UTF8)
390 c->convert = conv_to_utf8;
392 c->x_to_out = x_to_output[dest];
398 conv_x_to_ucs(unsigned int x)
404 conv_ucs_to_x(unsigned int ucs)
406 return uni_to_x[ucs >> 8U][ucs & 0xff];
412 return sizeof(x_to_uni) / sizeof(x_to_uni[0]);