2 * Character Set Conversion Library 1.2
4 * (c) 1998--2004 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include "charset/charconv.h"
12 #include "charset/chartable.h"
15 conv_init(struct conv_context *c)
17 c->source = c->source_end = NULL;
18 c->dest = c->dest_start = c->dest_end = NULL;
22 conv_none(struct conv_context *c)
24 c->dest_start = (char *) c->source;
25 c->dest = (char *) c->source_end;
26 return CONV_SOURCE_END | CONV_DEST_END | CONV_SKIP;
39 conv_slow(struct conv_context *c)
41 const unsigned char *s = c->source;
42 const unsigned char *se = c->source_end;
43 unsigned char *d = c->dest;
44 unsigned char *de = c->dest_end;
58 *d++ = *c->string_at++;
67 if ((*s & 0xc0) != 0x80)
72 c->code = (c->code << 6) | (*s++ & 0x3f);
75 if (c->code >= 0x10000)
80 case UTF8_WRITE_START:
88 else if (c->code < 0x800)
90 *d++ = 0xc0 | (c->code >> 6);
96 *d++ = 0xe0 | (c->code >> 12);
101 c->state = UTF8_WRITE_CONT;
103 case UTF8_WRITE_CONT:
108 *d++ = 0x80 | (c->code >> 10);
123 return CONV_SOURCE_END;
127 return CONV_DEST_END;
131 conv_from_utf8(struct conv_context *c)
133 unsigned short *x_to_out = c->x_to_out;
134 const unsigned char *s, *se;
135 unsigned char *d, *de, *k;
136 unsigned int code, cc, len;
139 if (unlikely(c->state))
147 while (s < se) /* Optimized for speed, beware of spaghetti code */
158 if ((s[0] & 0xc0) != 0x80)
161 code = (code << 6) | (*s++ & 0x3f);
165 if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
168 code = (code << 6) | (*s++ & 0x3f);
169 code = (code << 6) | (*s++ & 0x3f);
175 if ((*s++ & 0xc0) != 0x80)
190 code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
199 k = string_table + code - 0x100;
209 return CONV_SOURCE_END;
212 if (cc < 0xe0) { c->code = cc & 0x1f; c->remains = 1; }
213 else if (cc < 0xf0) { c->code = cc & 0x0f; c->remains = 2; }
217 if (cc < 0xf8) c->remains = 3;
218 else if (cc < 0xfc) c->remains = 4;
219 else if (cc < 0xfe) c->remains = 5;
222 c->state = UTF8_READ;
226 c->state = SEQ_WRITE;
232 c->state = SINGLE_WRITE;
254 conv_to_utf8(struct conv_context *c)
256 unsigned short *in_to_x = c->in_to_x;
257 const unsigned char *s, *se;
258 unsigned char *d, *de;
262 if (unlikely(c->state))
272 code = x_to_uni[in_to_x[*s]];
279 else if (code < 0x800)
283 *d++ = 0xc0 | (code >> 6);
284 *d++ = 0x80 | (code & 0x3f);
290 *d++ = 0xe0 | (code >> 12);
291 *d++ = 0x80 | ((code >> 6) & 0x3f);
292 *d++ = 0x80 | (code & 0x3f);
298 return CONV_SOURCE_END;
303 return CONV_DEST_END;
308 c->state = UTF8_WRITE_START;
318 conv_standard(struct conv_context *c)
320 unsigned short *in_to_x = c->in_to_x;
321 unsigned short *x_to_out = c->x_to_out;
322 const unsigned char *s, *se;
323 unsigned char *d, *de, *k;
326 if (unlikely(c->state))
336 unsigned int code = x_to_out[in_to_x[*s]];
339 if (unlikely(d >= de))
345 k = string_table + code - 0x100;
347 if (unlikely(d + len > de))
356 return CONV_SOURCE_END;
361 return CONV_DEST_END;
366 c->state = SEQ_WRITE;
377 conv_set_charset(struct conv_context *c, int src, int dest)
379 c->source_charset = src;
380 c->dest_charset = dest;
382 c->convert = conv_none;
385 c->convert = conv_standard;
386 if (src == CONV_CHARSET_UTF8)
387 c->convert = conv_from_utf8;
389 c->in_to_x = input_to_x[src];
390 if (dest == CONV_CHARSET_UTF8)
391 c->convert = conv_to_utf8;
393 c->x_to_out = x_to_output[dest];
399 conv_x_to_ucs(unsigned int x)
405 conv_ucs_to_x(unsigned int ucs)
407 return uni_to_x[ucs >> 8U][ucs & 0xff];
413 return sizeof(x_to_uni) / sizeof(x_to_uni[0]);
417 conv_in_to_ucs(struct conv_context *c, unsigned int y)
419 return x_to_uni[c->in_to_x[y]];
422 int conv_ucs_to_out(struct conv_context *c, unsigned int ucs)
424 uns x = uni_to_x[ucs >> 8U][ucs & 0xff];
425 if (x == 256 || c->x_to_out[x] >= 256)
428 return c->x_to_out[x];