]> mj.ucw.cz Git - libucw.git/blob - lib/tagged-text.h
Merged obj2buck.h and buck2obj.h to object.h, the number of includes
[libucw.git] / lib / tagged-text.h
1 /*
2  *      Sherlock: Processing of tagged characters
3  *
4  *      (c) 2001--2003 Martin Mares <mj@ucw.cz>
5  */
6
7 #ifndef _SHERLOCK_TAGGED_TEXT_H
8 #define _SHERLOCK_TAGGED_TEXT_H
9
10 #include "lib/fastbuf.h"
11 #include "charset/unistream.h"
12
13 /* Reading of tagged text (Unicode values, tags mapped to 0x80000000 and higher) */
14
15 #define GET_TAGGED_CHAR(p,u) do {                               \
16   u = *p;                                                       \
17   if (u >= 0xc0)                                                \
18     GET_UTF8_CHAR(p,u);                                         \
19   else if (u >= 0x80)                                           \
20     {                                                           \
21       p++;                                                      \
22       if (u >= 0xb0)                                            \
23         {                                                       \
24           ASSERT(u == 0xb0);                                    \
25           u += 0x80020000;                                      \
26         }                                                       \
27       else if (u >= 0xa0)                                       \
28         {                                                       \
29           ASSERT(*p >= 0x80 && *p <= 0xbf);                     \
30           u = 0x80010000 + ((u & 0x0f) << 6) + (*p++ & 0x3f);   \
31         }                                                       \
32       else                                                      \
33         u += 0x80000000;                                        \
34     }                                                           \
35   else                                                          \
36     p++;                                                        \
37 } while (0)
38
39 #define SKIP_TAGGED_CHAR(p) do {                                \
40   if (*p >= 0x80 && *p < 0xc0)                                  \
41     {                                                           \
42       uns u = *p++;                                             \
43       if (u >= 0xa0 && u < 0xb0 && *p >= 0x80 && *p < 0xc0)     \
44         p++;                                                    \
45     }                                                           \
46   else                                                          \
47     UTF8_SKIP(p);                                               \
48 } while (0)
49
50 static inline uns
51 bget_tagged_char(struct fastbuf *f)
52 {
53   uns u = bgetc(f);
54   if ((int)u < 0x80)
55     ;
56   else if (u < 0xc0)
57     {
58       if (u >= 0xb0)
59         {
60           ASSERT(u == 0xb0);
61           u += 0x80020000;
62         }
63       else if (u >= 0xa0)
64         {
65           uns v = bgetc(f);
66           ASSERT(v >= 0x80 && v <= 0xbf);
67           u = 0x80010000 + ((u & 0x0f) << 6) + (v & 0x3f);
68         }
69       else
70         u += 0x80000000;
71     }
72   else
73     {
74       bungetc(f);
75       u = bget_utf8(f);
76     }
77   return u;
78 }
79
80 #endif