Written by Tomas Valla.
log log-stream log-file log-syslog log-conf tbf \
conf-context conf-alloc conf-dump conf-input conf-intr conf-journal conf-parse conf-section conf-getopt \
ipaccess \
- fastbuf ff-binary ff-string ff-printf ff-unicode ff-stkstring \
+ fastbuf ff-binary ff-string ff-printf ff-unicode ff-varint ff-stkstring \
fb-file fb-mem fb-temp tempfile fb-mmap fb-limfd fb-buffer fb-grow fb-pool fb-atomic fb-param fb-socket fb-multi \
- char-cat char-upper char-lower unicode stkstring \
+ char-cat char-upper char-lower unicode varint stkstring \
wildmatch regex \
prime primetable random \
time-stamp time-timer time-conf \
lib.h log.h threads.h time.h \
mempool.h eltpool.h \
clists.h slists.h simple-lists.h \
- string.h stkstring.h unicode.h chartype.h regex.h \
+ string.h stkstring.h unicode.h varint.h chartype.h regex.h \
wildmatch.h \
unaligned.h \
bbuf.h gbuf.h gary.h bitarray.h bitsig.h \
prime.h \
bitops.h \
conf.h getopt.h ipaccess.h \
- fastbuf.h io.h ff-unicode.h ff-binary.h \
+ fastbuf.h io.h ff-unicode.h ff-varint.h ff-binary.h \
url.h \
mainloop.h \
process.h \
$(o)/ucw/hashfunc.o $(o)/ucw/hashfunc.oo: CFLAGS += -funroll-loops
$(o)/ucw/lizard.o: CFLAGS += $(COPT2) -funroll-loops
+$(o)/ucw/ff-varint-t: $(LIBUCW)
+$(o)/ucw/varint-t: $(LIBUCW)
$(o)/ucw/conf-test: $(o)/ucw/conf-test.o $(LIBUCW)
$(o)/ucw/hash-test: $(o)/ucw/hash-test.o $(LIBUCW)
$(o)/ucw/hashfunc-test: $(o)/ucw/hashfunc-test.o $(LIBUCW)
$(o)/ucw/ipaccess-test: $(o)/ucw/ipaccess-test.o $(LIBUCW)
$(o)/ucw/trie-test: $(o)/ucw/trie-test.o $(LIBUCW)
-TESTS+=$(addprefix $(o)/ucw/,regex.test unicode.test hash-test.test mempool.test stkstring.test \
- slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test eltpool.test \
+TESTS+=$(addprefix $(o)/ucw/,varint.test regex.test unicode.test hash-test.test mempool.test stkstring.test \
+ slists.test bbuf.test kmp-test.test getopt.test ff-unicode.test ff-varint.test eltpool.test \
fb-socket.test trie-test.test string.test sha1.test asort-test.test binheap-test.test \
redblack-test.test fb-file.test fb-grow.test fb-pool.test fb-atomic.test \
fb-limfd.test fb-temp.test fb-mem.test fb-buffer.test fb-mmap.test fb-multi.test url.test strtonum-test.test \
gary.test time.test crc.test signames.test md5.test)
+$(o)/ucw/varint.test: $(o)/ucw/varint-t
$(o)/ucw/regex.test: $(o)/ucw/regex-t
$(o)/ucw/unicode.test: $(o)/ucw/unicode-t
$(o)/ucw/hash-test.test: $(o)/ucw/hash-test
$(o)/ucw/bbuf.test: $(o)/ucw/bbuf-t
$(o)/ucw/getopt.test: $(o)/ucw/getopt-t
$(o)/ucw/ff-unicode.test: $(o)/ucw/ff-unicode-t
+$(o)/ucw/ff-varint.test: $(o)/ucw/ff-varint-t
$(o)/ucw/eltpool.test: $(o)/ucw/eltpool-t
$(o)/ucw/string.test: $(o)/ucw/str-hex-t $(o)/ucw/str-esc-t $(o)/ucw/str-fix-t
$(o)/ucw/sha1.test: $(o)/ucw/sha1-t $(o)/ucw/sha1-hmac-t
--- /dev/null
+/*
+ * UCW Library: Reading and writing Varints on Fastbuf Streams
+ *
+ * (c) 2013 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <ucw/lib.h>
+#include <ucw/fastbuf.h>
+#include <ucw/varint.h>
+#include <ucw/ff-varint.h>
+#include <ucw/ff-binary.h>
+
+
+u64 bget_varint_slow(struct fastbuf *b, u64 repl)
+{
+ uns h = bgetc(b);
+ uns l = varint_len(h);
+ byte buf[l];
+ buf[0] = h;
+ l--;
+ if (breadb(b, buf+1, l) < l)
+ return repl;
+ varint_get(buf, &repl);
+ return repl;
+}
+
+void bput_varint_slow(struct fastbuf *b, u64 u)
+{
+ byte buf[9];
+ uns l = varint_put(buf, u);
+ bwrite(b, buf, l);
+}
+
+#ifdef TEST
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+ #define FUNCS \
+ F(BGET_VARINT) F(BPUT_VARINT)
+
+ enum {
+ #define F(x) FUNC_##x,
+ FUNCS
+ #undef F
+ };
+ char *names[] = {
+ #define F(x) [FUNC_##x] = #x,
+ FUNCS
+ #undef F
+ };
+
+ uns func = ~0U;
+ if (argc > 1)
+ for (uns i = 0; i < ARRAY_SIZE(names); i++)
+ if (!strcasecmp(names[i], argv[1]))
+ func = i;
+ if (!~func) {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+
+ struct fastbuf *b = fbgrow_create(8);
+ switch (func) {
+ uns u;
+ u64 r;
+ int i;
+ case FUNC_BGET_VARINT:
+ while (scanf("%x", &u) == 1)
+ bputc(b, u);
+ fbgrow_rewind(b);
+ while (bpeekc(b) >= 0) {
+ if (btell(b))
+ putchar(' ');
+ r = bget_varint_slow(b, ~0LLU);
+ printf("%lx", r);
+ }
+ putchar('\n');
+ break;
+
+ case FUNC_BPUT_VARINT:
+ i = 0;
+ while (scanf("%lx", &r) == 1)
+ bput_varint_slow(b, r);
+ fbgrow_rewind(b);
+ while (bpeekc(b) >= 0) {
+ if (i++)
+ putchar(' ');
+ printf("%02x", bgetc(b));
+ }
+ putchar('\n');
+ break;
+ default:
+ ASSERT(0);
+ }
+
+ bclose(b);
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library: Reading and writing Varints on Fastbuf Streams
+ *
+ * (c) 2013 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_FF_VARINT_H
+#define _UCW_FF_VARINT_H
+
+#include <ucw/fastbuf.h>
+#include <ucw/varint.h>
+
+u64 bget_varint_slow(struct fastbuf *b, u64 repl);
+void bput_varint_slow(struct fastbuf *b, u64 u);
+
+/**
+ * Reads u64 encoded as varint from the fastbuf b.
+ * If the read is unsuccessful, returns repl.
+ **/
+static inline u64 bget_varint_repl(struct fastbuf *b, u64 repl)
+{
+ uns l;
+ if (bavailr(b) >= 1) {
+ l = varint_len(*b->bptr);
+ if (bavailr(b) >= l) {
+ b->bptr += l;
+ varint_get(b->bptr, &repl);
+ return repl;
+ }
+ }
+ return bget_varint_slow(b, repl);
+}
+
+/**
+ * Reads u64 encoded as varint from the fastbuf b.
+ * If the read is unsuccessful, returns ~0LLU.
+ **/
+static inline int bget_varint(struct fastbuf *b)
+{
+ return bget_varint_repl(b, ~0LLU);
+}
+
+/** Writes u64 u encoded as varint to the fastbuf b. **/
+static inline void bput_varint(struct fastbuf *b, u64 u)
+{
+ uns l = varint_space(u);
+ if (bavailw(b) >= l)
+ b->bptr += varint_put(b->bptr, u);
+ else
+ bput_varint_slow(b, u);
+}
+
+#endif
--- /dev/null
+# Tests for the FF-Varint module
+
+Name: bput_varint
+Run: ../obj/ucw/ff-varint-t bput_varint
+In: deadbeef 2222 feeddeadbabebeef
+Out: f0 ce 8d 7e 6f a1 a2 ff fd eb da a5 aa 9e 7e 6f
+
+Name: bget_varint
+Run: ../obj/ucw/ff-varint-t bget_varint
+In: ff fd eb da a5 aa 9e 7e 6f f0 ce 8d 7e 6f a1 a2
+Out: feeddeadbabebeef deadbeef 2222
--- /dev/null
+/*
+ * UCW Library -- Coding of u64 into variable length bytecode.
+ *
+ * (c) 2013 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#include <ucw/lib.h>
+#include <ucw/varint.h>
+
+#define PUTB(j,i) p[j] = (byte)((u >> (8*(i))));
+#define PUTB4(b) PUTB(0,b-1) PUTB(1,b-2) PUTB(2,b-3) PUTB(3,b-4)
+uns varint_put_big(byte *p, u64 u)
+{
+ ASSERT(u >= VARINT_SHIFT_L4);
+
+ if (u < VARINT_SHIFT_L5) {
+ u -= VARINT_SHIFT_L4;
+ PUTB4(5)
+ p[0] |= 0xf0;
+ PUTB(4,0);
+ return 5;
+ }
+ if (u < VARINT_SHIFT_L6) {
+ u -= VARINT_SHIFT_L5;
+ PUTB4(6)
+ PUTB(4,1) PUTB(5,0)
+ p[0] |= 0xf8;
+ return 6;
+ }
+ if (u < VARINT_SHIFT_L7) {
+ u -= VARINT_SHIFT_L6;
+ PUTB4(7)
+ p[0] |= 0xfc;
+ PUTB(4,2) PUTB(5,1) PUTB(6,0)
+ return 7;
+ }
+ if (u < VARINT_SHIFT_L8) {
+ u -= VARINT_SHIFT_L7;
+ p[0] = 0xfe;
+ PUTB(1,6) PUTB(2,5) PUTB(3,4) PUTB(4,3)
+ PUTB(5,2) PUTB(6,1) PUTB(7,0)
+ return 8;
+ }
+ u -= VARINT_SHIFT_L8;
+ p[0] = 0xff;
+ PUTB(1,7) PUTB(2,6) PUTB(3,5) PUTB(4,4)
+ PUTB(5,3) PUTB(6,2) PUTB(7,1) PUTB(8,0)
+ return 9;
+}
+
+const byte *varint_get_big(const byte *p, u64 *r)
+{
+ ASSERT((*p & 0xf0) == 0xf0);
+
+ byte h = ~*p;
+ if (h & 0x08) {
+ *r = (u64)(p[0] & 7)<<32 | (u64)p[1]<<24 | (u64)p[2]<<16 | (u64)p[3]<<8 | (u64)p[4];
+ *r += VARINT_SHIFT_L4;
+ return p+5;
+ }
+ if (h & 0x04) {
+ *r = (u64)(p[0] & 3)<<40 | (u64)p[1]<<32 | (u64)p[2]<<24 | (u64)p[3]<<16 | (u64)p[4]<<8 | (u64)p[5];
+ *r += VARINT_SHIFT_L5;
+ return p+6;
+ }
+ if (h & 0x02) {
+ *r = (u64)(p[0] & 1)<<48 | (u64)p[1]<<40 | (u64)p[2]<<32 | (u64)p[3]<<24 | (u64)p[4]<<16 | (u64)p[5]<<8 | (u64)p[6];
+ *r += VARINT_SHIFT_L6;
+ return p+7;
+ }
+ if (h & 0x01) {
+ *r = (u64)p[1]<<48 | (u64)p[2]<<40 | (u64)p[3]<<32 | (u64)p[4]<<24 | (u64)p[5]<<16 | (u64)p[6]<<8 | (u64)p[7];
+ *r += VARINT_SHIFT_L7;
+ return p+8;
+ }
+ *r = ((u64)p[1] << 56) | ((u64)p[2] << 48) | ((u64)p[3] << 40) | ((u64)p[4] << 32) | ((u64)p[5] << 24) | ((u64)p[6] << 16) | ((u64)p[7] << 8) | (u64)p[8];
+ *r += VARINT_SHIFT_L8;
+ return p+9;
+}
+
+
+#ifdef TEST
+
+#include <string.h>
+#include <stdio.h>
+
+int main(int argc, char **argv UNUSED)
+{
+ byte buf[16] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa };
+ u64 u;
+
+ if (scanf("%lx", &u) != 1) {
+ fprintf(stderr, "Invalid usage!\n");
+ return 1;
+ }
+ varint_put(buf, u);
+ int l = varint_len(buf[0]);
+ varint_get(buf, &u);
+ printf("%u %d %jx", varint_space(u), l, (uintmax_t) u);
+ if (argc > 1) {
+ for (int i=0; i<l; i++)
+ printf(" %x", buf[i]);
+ }
+ printf("\n");
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/*
+ * UCW Library -- Coding of u64 into variable length bytecode.
+ *
+ * (c) 2013 Tomas Valla <tom@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#ifndef _UCW_VARINT_H
+#define _UCW_VARINT_H
+
+/***
+ * The encoding works in the following way:
+ *
+ * First byte Stored values
+ *
+ * 0xxxxxxx 7 bits
+ * 10xxxxxx +1 byte 14 bits, value shifted by 2^7
+ * 110xxxxx +2 bytes 21 bits, value shifted by 2^7+2^14
+ * 1110xxxx +3 bytes 28 bits, value shifted by 2^7+2^14+2^21
+ * ....
+ * 11111110 +7 bytes 56 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42
+ * 11111111 +8 bytes full 64 bits, value shifted by 2^7+2^14+2^21+2^28+2^35+2^42+2^56
+ *
+ * The values are stored in bigendian to allow lexicographic sorting.
+ * The encoding and algorithms are aimed to be optimised for storing shorter numbers.
+ *
+ * There is an invalid combination: if the first two bytes are 0xff 0xff.
+ ***/
+
+
+#define VARINT_SHIFT_L1 0x80
+#define VARINT_SHIFT_L2 0x4080
+#define VARINT_SHIFT_L3 0x204080
+#define VARINT_SHIFT_L4 0x10204080
+#define VARINT_SHIFT_L5 0x0810204080
+#define VARINT_SHIFT_L6 0x040810204080
+#define VARINT_SHIFT_L7 0x02040810204080
+#define VARINT_SHIFT_L8 0x0102040810204080
+
+/*
+ * The following code is already unrolled, to maximize the speed.
+ * Yes, it is ugly.
+ */
+
+#define PUTB(j,i) p[j] = (byte)((u >> (8*(i))));
+#define PUTB4(b) PUTB(0,b-1) PUTB(1,b-2) PUTB(2,b-3) PUTB(3,b-4)
+
+/* for internal use only, need the length > 4 */
+uns varint_put_big(byte *p, u64 u);
+const byte *varint_get_big(const byte *p, u64 *r);
+
+/**
+ * Encode u64 value u into byte sequence p.
+ * Returns the number of bytes used (at least 1 and at most 9).
+ **/
+static inline uns varint_put(byte *p, u64 u)
+{
+ if (u < VARINT_SHIFT_L1) {
+ p[0] = (byte)u;
+ return 1;
+ }
+ if (u < VARINT_SHIFT_L2) {
+ u -= VARINT_SHIFT_L1;
+ PUTB(0,1)
+ p[0] |= 0x80;
+ PUTB(1,0)
+ return 2;
+ }
+ if (u < VARINT_SHIFT_L3) {
+ u -= VARINT_SHIFT_L2;
+ PUTB(0,2)
+ p[0] |= 0xc0;
+ PUTB(1,1) PUTB(2,0)
+ return 3;
+ }
+ if (u < VARINT_SHIFT_L4) {
+ u -= VARINT_SHIFT_L3;
+ PUTB4(4)
+ p[0] |= 0xe0;
+ return 4;
+ }
+ return varint_put_big(p, u);
+}
+#undef GETB
+#undef GETB4
+
+/**
+ * Decode u64 value from a byte sequence p into res.
+ * The invalid combination is not detected.
+ * Returns pointer to the next position after the sequence.
+ **/
+static inline const byte *varint_get(const byte *p, u64 *res)
+{
+ byte h = ~*p;
+
+ if (h & 0x80) {
+ *res = p[0];
+ return p+1;
+ }
+ if (h & 0x40) {
+ *res = (p[0] & 0x3f)<<8 | p[1];
+ *res += VARINT_SHIFT_L1;
+ return p+2;
+ }
+ if (h & 0x20) {
+ *res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
+ *res += VARINT_SHIFT_L2;
+ return p+3;
+ }
+ if (h & 0x10) {
+ *res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
+ *res += VARINT_SHIFT_L3;
+ return p+4;
+ }
+ return varint_get_big(p, res);
+}
+
+/**
+ * Decode at most u32 value from a byte sequence p into u32 res.
+ * The invalid combination is not detected.
+ * Returns pointer to the next position after the sequence.
+ * If the stored number cannot fit into u32, fill res by 0xffffffff and returns p.
+ **/
+static inline const byte *varint_get32(const byte *p, u32 *res)
+{
+ byte h = ~*p;
+
+ if (h & 0x80) {
+ *res = p[0];
+ return p+1;
+ }
+ if (h & 0x40) {
+ *res = (p[0] & 0x3f)<<8 | p[1];
+ *res += VARINT_SHIFT_L1;
+ return p+2;
+ }
+ if (h & 0x20) {
+ *res = (p[0] & 0x1f)<<16 | p[1]<<8 | p[2];
+ *res += VARINT_SHIFT_L2;
+ return p+3;
+ }
+ if (h & 0x10) {
+ *res = (p[0] & 0xf)<<24 | p[1]<<16 | p[2]<<8 | p[3];
+ *res += VARINT_SHIFT_L3;
+ return p+4;
+ }
+ u64 r = (u64)(p[0] & 7)<<32 | (u64)p[1]<<24 | (u64)p[2]<<16 | (u64)p[3]<<8 | (u64)p[4];
+ r += VARINT_SHIFT_L4;
+ if (r > 0xffffffff) {
+ *res = 0xffffffff;
+ return p;
+ }
+ *res = r;
+ return p+5;
+}
+
+/** Does the byte sequence p code the invalid sequence? **/
+static inline int varint_invalid(const byte *p)
+{
+ return p[0] == 0xff && p[1] == 0xff;
+}
+
+/**
+ * Store the invalid sequence.
+ * Returns always 2 (2 bytes were used, to be consistent with varint_put).
+ **/
+static inline uns varint_put_invalid(byte *p)
+{
+ p[0] = p[1] = 0xff;
+ return 2;
+}
+
+/** Compute the length of encoding in bytes from the first byte hdr of the encoding. **/
+static inline uns varint_len(const byte hdr)
+{
+ byte b = ~hdr;
+
+ uns l = 0;
+ if (!b)
+ l = -1;
+ else {
+ if (b & 0xf0) { l += 4; b &= 0xf0; }
+ if (b & 0xcc) { l += 2; b &= 0xcc; }
+ if (b & 0xaa) l++;
+ }
+ return 8 - l;
+}
+
+/** Compute the number of bytes needed to store the value u. **/
+static inline uns varint_space(u64 u)
+{
+ if (u < VARINT_SHIFT_L1)
+ return 1;
+ if (u < VARINT_SHIFT_L2)
+ return 2;
+ if (u < VARINT_SHIFT_L3)
+ return 3;
+ if (u < VARINT_SHIFT_L4)
+ return 4;
+ if (u < VARINT_SHIFT_L5)
+ return 5;
+ if (u < VARINT_SHIFT_L6)
+ return 6;
+ if (u < VARINT_SHIFT_L7)
+ return 7;
+ if (u < VARINT_SHIFT_L8)
+ return 8;
+ return 9;
+}
+
+#endif
--- /dev/null
+# Tests for the Varint module
+# In format: <hexnumber>
+# Out format: <space> <len> <hexnumber>
+# when run as varint-t full:
+# Out format: <space> <len> <hexnumber> <hexbyte0> <hexbyte1> ...
+
+Name: varint (1)
+Run: ../obj/ucw/varint-t full
+In: 41
+Out: 1 1 41 41
+
+Name: varint (2)
+Run: ../obj/ucw/varint-t full
+In: 80
+Out: 2 2 80 80 0
+
+Name: varint (3)
+Run: ../obj/ucw/varint-t full
+In: ab
+Out: 2 2 ab 80 2b
+
+Name: varint (4)
+Run: ../obj/ucw/varint-t full
+In: 0
+Out: 1 1 0 0
+
+Name: varint (5)
+Run: ../obj/ucw/varint-t full
+In: 2222
+Out: 2 2 2222 a1 a2
+
+Name: varint (6)
+Run: ../obj/ucw/varint-t
+In: abcd
+Out: 3 3 abcd
+
+Name: varint (7)
+Run: ../obj/ucw/varint-t
+In: abcde
+Out: 3 3 abcde
+
+Name: varint (8)
+Run: ../obj/ucw/varint-t
+In: 1020407f
+Out: 4 4 1020407f
+
+Name: varint (9)
+Run: ../obj/ucw/varint-t full
+In: deadbeef
+Out: 5 5 deadbeef f0 ce 8d 7e 6f
+
+Name: varint (10)
+Run: ../obj/ucw/varint-t
+In: 4081020400f
+Out: 6 6 4081020400f
+
+Name: varint (11)
+Run: ../obj/ucw/varint-t
+In: 20408010204070
+Out: 8 8 20408010204070
+
+Name: varint (12)
+Run: ../obj/ucw/varint-t
+In: feeddeadbabebeef
+Out: 9 9 feeddeadbabebeef
+
+Name: varint (13)
+Run: ../obj/ucw/varint-t
+In: 102040810204080
+Out: 9 9 102040810204080