2 * UCW Library -- Coding of u64 into variable length bytecode.
4 * (c) 2013 Tomas Valla <tom@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
14 * The encoding works in the following way:
16 * First byte Stored values
19 * 10xxxxxx +1 byte 14 bits, value shifted by 2^7
20 * 110xxxxx +2 bytes 21 bits, value shifted by 2^7+2^14
21 * 1110xxxx +3 bytes 28 bits, value shifted by 2^7+2^14+2^21
23 * 11111110 +7 bytes 56 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42
24 * 11111111 +8 bytes full 64 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42+2^56
26 * The values are stored in bigendian to allow lexicographic sorting.
27 * The encoding and algorithms are aimed to be optimised for storing shorter numbers.
29 * There is an invalid combination: if the first two bytes are 0xff 0xff.
33 #define VARINT_SHIFT_L1 0x80
34 #define VARINT_SHIFT_L2 0x4080
35 #define VARINT_SHIFT_L3 0x204080
36 #define VARINT_SHIFT_L4 0x10204080
37 #define VARINT_SHIFT_L5 0x0810204080
38 #define VARINT_SHIFT_L6 0x040810204080
39 #define VARINT_SHIFT_L7 0x02040810204080
40 #define VARINT_SHIFT_L8 0x0102040810204080
43 * The following code is already unrolled, to maximize the speed.
47 #define PUTB(j,i) p[j] = (byte)((u >> (8*(i))));
48 #define PUTB4(b) PUTB(0,b-1) PUTB(1,b-2) PUTB(2,b-3) PUTB(3,b-4)
50 /* for internal use only, need the length > 4 */
51 uns varint_put_big(byte *p, u64 u);
52 const byte *varint_get_big(const byte *p, u64 *r);
55 * Encode u64 value u into byte sequence p.
56 * Returns the number of bytes used (at least 1 and at most 9).
58 static inline uns varint_put(byte *p, u64 u)
60 if (u < VARINT_SHIFT_L1) {
64 if (u < VARINT_SHIFT_L2) {
71 if (u < VARINT_SHIFT_L3) {
78 if (u < VARINT_SHIFT_L4) {
84 return varint_put_big(p, u);
90 * Decode u64 value from a byte sequence p into res.
91 * The invalid combination is not detected.
92 * Returns pointer to the next position after the sequence.
94 static inline const byte *varint_get(const byte *p, u64 *res)
103 *res = (p[0] & 0x3f)<<8 | p[1];
104 *res += VARINT_SHIFT_L1;
108 *res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
109 *res += VARINT_SHIFT_L2;
113 *res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
114 *res += VARINT_SHIFT_L3;
117 return varint_get_big(p, res);
121 * Decode at most u32 value from a byte sequence p into u32 res.
122 * The invalid combination is not detected.
123 * Returns pointer to the next position after the sequence.
124 * If the stored number cannot fit into u32, fill res by 0xffffffff and returns p.
126 static inline const byte *varint_get32(const byte *p, u32 *res)
135 *res = (p[0] & 0x3f)<<8 | p[1];
136 *res += VARINT_SHIFT_L1;
140 *res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
141 *res += VARINT_SHIFT_L2;
145 *res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
146 *res += VARINT_SHIFT_L3;
149 u64 r = (u64)(p[0] & 7)<<32 | (u64)p[1]<<24 | (u64)p[2]<<16 | (u64)p[3]<<8 | (u64)p[4];
150 r += VARINT_SHIFT_L4;
151 if (r > 0xffffffff) {
159 /** Does the byte sequence p code the invalid sequence? **/
160 static inline int varint_invalid(const byte *p)
162 return p[0] == 0xff && p[1] == 0xff;
166 * Store the invalid sequence.
167 * Returns always 2 (2 bytes were used, to be consistent with varint_put).
169 static inline uns varint_put_invalid(byte *p)
175 /** Compute the length of encoding in bytes from the first byte hdr of the encoding. **/
176 static inline uns varint_len(const byte hdr)
184 if (b & 0xf0) { l += 4; b &= 0xf0; }
185 if (b & 0xcc) { l += 2; b &= 0xcc; }
191 /** Compute the number of bytes needed to store the value u. **/
192 static inline uns varint_space(u64 u)
194 if (u < VARINT_SHIFT_L1)
196 if (u < VARINT_SHIFT_L2)
198 if (u < VARINT_SHIFT_L3)
200 if (u < VARINT_SHIFT_L4)
202 if (u < VARINT_SHIFT_L5)
204 if (u < VARINT_SHIFT_L6)
206 if (u < VARINT_SHIFT_L7)
208 if (u < VARINT_SHIFT_L8)