2 * Character Set Conversion Library 1.1
4 * (c) 1998--2001 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
11 #include "charset/charconv.h"
12 #include "charset/chartable.h"
15 conv_init(struct conv_context *c)
17 c->source = c->source_end = NULL;
18 c->dest = c->dest_start = c->dest_end = NULL;
22 conv_none(struct conv_context *c)
24 c->dest_start = (char *) c->source;
25 c->dest = (char *) c->source_end;
26 return CONV_SOURCE_END | CONV_DEST_END | CONV_SKIP;
30 conv_from_utf8(struct conv_context *c)
32 unsigned short *x_to_out = c->x_to_out;
33 const unsigned char *s = c->source;
34 const unsigned char *se = c->source_end;
35 unsigned char *d = c->dest;
36 unsigned char *de = c->dest_end;
37 unsigned char *strings = string_table - 0x100;
38 unsigned int counter, code, cc;
43 while (s < se) /* Optimized for speed, beware of spaghetti code */
54 if ((s[0] & 0xc0) != 0x80)
57 code = (code << 6) | (*s++ & 0x3f);
61 if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
64 code = (code << 6) | (*s++ & 0x3f);
65 code = (code << 6) | (*s++ & 0x3f);
71 if ((*s++ & 0xc0) != 0x80)
86 code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
96 unsigned char *k = strings + code;
97 unsigned int len = *k++;
109 return CONV_SOURCE_END;
116 return CONV_DEST_END;
126 if (cc < 0xe0) { code = cc & 0x1f; counter = 1; }
127 else if (cc < 0xf0) { code = cc & 0x0f; counter = 2; }
131 if (cc < 0xf8) counter = 3;
132 else if (cc < 0xfc) counter = 4;
133 else if (cc < 0xfe) counter = 5;
145 if ((*s & 0xc0) != 0x80)
147 code = (code << 6) | (*s++ & 0x3f);
156 conv_to_utf8(struct conv_context *c)
158 unsigned short *in_to_x = c->in_to_x;
159 const unsigned char *s = c->source;
160 const unsigned char *se = c->source_end;
161 unsigned char *d = c->dest;
162 unsigned char *de = c->dest_end;
166 unsigned int code = x_to_uni[in_to_x[*s]];
173 else if (code < 0x800)
177 *d++ = 0xc0 | (code >> 6);
178 *d++ = 0x80 | (code & 0x3f);
184 *d++ = 0xc0 | (code >> 12);
185 *d++ = 0x80 | ((code >> 6) & 0x3f);
186 *d++ = 0x80 | (code & 0x3f);
192 return CONV_SOURCE_END;
197 return CONV_DEST_END;
201 conv_standard(struct conv_context *c)
203 unsigned short *in_to_x = c->in_to_x;
204 unsigned short *x_to_out = c->x_to_out;
205 const unsigned char *s = c->source;
206 const unsigned char *se = c->source_end;
207 unsigned char *d = c->dest;
208 unsigned char *de = c->dest_end;
209 unsigned char *strings = string_table - 0x100;
213 unsigned int code = x_to_out[in_to_x[*s]];
222 unsigned char *k = strings + code;
223 unsigned int len = *k++;
234 return CONV_SOURCE_END;
239 return CONV_DEST_END;
243 conv_set_charset(struct conv_context *c, int src, int dest)
246 c->convert = conv_none;
249 c->convert = conv_standard;
250 if (src == CONV_CHARSET_UTF8)
251 c->convert = conv_from_utf8;
253 c->in_to_x = input_to_x[src];
254 if (dest == CONV_CHARSET_UTF8)
255 c->convert = conv_to_utf8;
257 c->x_to_out = x_to_output[dest];
263 conv_x_to_ucs(unsigned int x)
269 conv_ucs_to_x(unsigned int ucs)
271 return uni_to_x[ucs >> 8U][ucs & 0xff];
277 return sizeof(x_to_uni) / sizeof(x_to_uni[0]);