2 * Character Set Conversion Library 1.0
4 * (c) 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
10 #include "charset/charconv.h"
11 #include "charset/chartable.h"
14 #define NULL ((void *)0)
18 conv_init(struct conv_context *c)
20 c->source = c->source_end = NULL;
21 c->dest = c->dest_start = c->dest_end = NULL;
25 conv_none(struct conv_context *c)
27 c->dest_start = (char *) c->source;
28 c->dest = (char *) c->source_end;
29 return CONV_SOURCE_END | CONV_DEST_END | CONV_SKIP;
33 conv_from_utf8(struct conv_context *c)
35 unsigned short *x_to_out = c->x_to_out;
36 const unsigned char *s = c->source;
37 const unsigned char *se = c->source_end;
38 unsigned char *d = c->dest;
39 unsigned char *de = c->dest_end;
40 unsigned char *strings = string_table - 0x100;
41 unsigned int counter, code, cc;
46 while (s < se) /* Optimized for speed, beware of spaghetti code */
57 if ((s[0] & 0xc0) != 0x80)
60 code = (code << 6) | (*s++ & 0x3f);
64 if ((s[0] & 0xc0) != 0x80 || (s[1] & 0xc0) != 0x80)
67 code = (code << 6) | (*s++ & 0x3f);
68 code = (code << 6) | (*s++ & 0x3f);
74 if ((*s++ & 0xc0) != 0x80)
89 code = x_to_out[uni_to_x[code >> 8U][code & 0xff]];
99 unsigned char *k = strings + code;
100 unsigned int len = *k++;
112 return CONV_SOURCE_END;
119 return CONV_DEST_END;
129 if (cc < 0xe0) { code = cc & 0x1f; counter = 1; }
130 else if (cc < 0xf0) { code = cc & 0x0f; counter = 2; }
134 if (cc < 0xf8) counter = 3;
135 else if (cc < 0xfc) counter = 4;
136 else if (cc < 0xfe) counter = 5;
148 if ((*s & 0xc0) != 0x80)
150 code = (code << 6) | (*s++ & 0x3f);
159 conv_to_utf8(struct conv_context *c)
161 unsigned short *in_to_x = c->in_to_x;
162 const unsigned char *s = c->source;
163 const unsigned char *se = c->source_end;
164 unsigned char *d = c->dest;
165 unsigned char *de = c->dest_end;
169 unsigned int code = x_to_uni[in_to_x[*s]];
176 else if (code < 0x800)
180 *d++ = 0xc0 | (code >> 6);
181 *d++ = 0x80 | (code & 0x3f);
187 *d++ = 0xc0 | (code >> 12);
188 *d++ = 0x80 | ((code >> 6) & 0x3f);
189 *d++ = 0x80 | (code & 0x3f);
195 return CONV_SOURCE_END;
200 return CONV_DEST_END;
204 conv_standard(struct conv_context *c)
206 unsigned short *in_to_x = c->in_to_x;
207 unsigned short *x_to_out = c->x_to_out;
208 const unsigned char *s = c->source;
209 const unsigned char *se = c->source_end;
210 unsigned char *d = c->dest;
211 unsigned char *de = c->dest_end;
212 unsigned char *strings = string_table - 0x100;
216 unsigned int code = x_to_out[in_to_x[*s]];
225 unsigned char *k = strings + code;
226 unsigned int len = *k++;
237 return CONV_SOURCE_END;
242 return CONV_DEST_END;
246 conv_set_charset(struct conv_context *c, int src, int dest)
249 c->convert = conv_none;
252 c->convert = conv_standard;
253 if (src == CONV_CHARSET_UTF8)
254 c->convert = conv_from_utf8;
256 c->in_to_x = input_to_x[src];
257 if (dest == CONV_CHARSET_UTF8)
258 c->convert = conv_to_utf8;
260 c->x_to_out = x_to_output[dest];