From: Martin Mares Date: Thu, 25 Apr 2002 17:37:11 +0000 (+0000) Subject: Implemented base-224 encoder and decoder. X-Git-Tag: holmes-import~1437 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=0eea3757c0ee4462a641562ba03c893548971d8f;p=libucw.git Implemented base-224 encoder and decoder. --- diff --git a/lib/Makefile b/lib/Makefile index 6c8ec220..01f79634 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -8,7 +8,7 @@ SHLIB_OBJS=alloc.o alloc_str.o ctmatch.o db.o fastbuf.o fb-file.o fb-mem.o lists prime.o random.o realloc.o regex.o timer.o url.o wildmatch.o \ wordsplit.o str_ctype.o str_upper.o bucket.o conf.o object.o sorter.o \ finger.o proctitle.o ipaccess.o profile.o bitsig.o randomkey.o \ - hash-string.o hash-istring.o custom.o + hash-string.o hash-istring.o custom.o base224.o obj/lib/libsh.a: $(addprefix obj/lib/,$(SHLIB_OBJS)) diff --git a/lib/base224.c b/lib/base224.c new file mode 100644 index 00000000..d42b34d5 --- /dev/null +++ b/lib/base224.c @@ -0,0 +1,210 @@ +/* + * Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + * + * The `base-224' encoding transforms general sequences of bytes + * to sequences of non-control 8-bit characters (0x20-0xff). Since + * 224 and 256 are incompatible bases (there is no k,l: 224^k=256^l) + * and we want to avoid lengthy calculations, we cheat a bit: + * + * Each base-224 digit can be represented as a (base-7 digit, base-32 digit) + * pair, so we pass the lower 5 bits directly and use a base-7 encoder + * for the upper part. We process blocks of 39 bits and encode them + * to 5 base-224 digits: we take 5x5 bits as the lower halves and convert + * the remaining 14 bits in base-7 (2^14 = 16384 < 16807 = 7^5) to get + * the 7 upper parts we need (with a little redundancy). Little endian + * ordering is used to make handling of partial blocks easy. + * + * We transform 39 source bits to 40 destination bits, stretching the data + * by 1/39 = approx. 2.56%. + */ + +#undef LOCAL_DEBUG + +#include "lib/lib.h" +#include "lib/base224.h" + +static void +encode_block(byte *w, u32 hi, u32 lo) +{ + uns x, y; + + /* + * Splitting of the 39-bit block: [a-e][0-5] are the base-32 digits, *'s are used for base-7. + * +----------------+----------------+----------------+----------------+----------------+ + * +00******e4e3e2e1|e0******d4d3d2d1|d0******c4c3c2c1|c0******b4b3b2b1|b0****a4a3a2a1a0| + * +----------------+----------------+----------------+----------------+----------------+ + */ + + w[0] = lo & 0x1f; + w[1] = (lo >> 7) & 0x1f; + w[2] = (lo >> 15) & 0x1f; + w[3] = (lo >> 23) & 0x1f; + w[4] = (lo >> 31) | ((hi << 1) & 0x1e); + x = (lo >> 5) & 0x0003 + | (lo >> 10) & 0x001c + | (lo >> 15) & 0x00e0 + | (lo >> 20) & 0x0700 + | (hi << 7) & 0x3800; + DBG("<<< h=%08x l=%08x x=%d", hi, lo, x); + for (y=0; y<5; y++) + { + w[y] += 0x20 + ((x % 7) << 5); + x /= 7; + } +} + +uns +base224_encode(byte *dest, byte *src, uns len) +{ + u32 lo=0, hi=0; /* 64-bit buffer accumulating input bits */ + uns i=0; /* How many source bits do we have buffered */ + u32 x; + byte *w=dest; + + while (len--) + { + x = *src++; + if (i < 32) + { + lo |= x << i; + if (i > 24) + hi |= x >> (32-i); + } + else + hi |= x << (i-32); + i += 8; + if (i >= 39) + { + encode_block(w, hi, lo); + w += 5; + lo = hi >> 7; + hi = 0; + i -= 39; + } + } + if (i) /* Partial block */ + { + encode_block(w, hi, lo); + w += (i+8)/8; /* Just check logarithms if you want to understand */ + } + return w - dest; +} + +uns +base224_decode(byte *dest, byte *src, uns len) +{ + u32 hi=0, lo=0; /* 64-bit buffer accumulating output bits */ + uns i=0; /* How many bits do we have accumulated */ + u32 h, l; /* Decoding of the current block */ + uns x; /* base-7 part of the current block */ + uns len0; + byte *start = dest; + + do + { + if (!len) + break; + len0 = len; + + ASSERT(*src >= 0x20); /* byte 0 */ + h = 0; + l = *src & 0x1f; + x = (*src++ >> 5) - 1; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 1 */ + l |= (*src & 0x1f) << 7; + x += ((*src++ >> 5) - 1) * 7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 2 */ + l |= (*src & 0x1f) << 15; + x += ((*src++ >> 5) - 1) * 7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 3 */ + l |= (*src & 0x1f) << 23; + x += ((*src++ >> 5) - 1) * 7*7*7; + if (!--len) + goto blockend; + + ASSERT(*src >= 0x20); /* byte 4 */ + l |= *src << 31; + h = (*src & 0x1f) >> 1; + x += ((*src++ >> 5) - 1) * 7*7*7*7; + --len; + + blockend: + len0 -= len; + l |= ((x & 0x0003) << 5) /* Decode base-7 */ + | ((x & 0x001c) << 10) + | ((x & 0x00e0) << 15) + | ((x & 0x0700) << 20); + h |= (x & 0x3800) >> 7; + + DBG("<<< i=%d h=%08x l=%08x x=%d len0=%d", i, h, l, x, len0); + lo |= l << i; + hi |= h << i; + if (i) + hi |= l >> (32-i); + i += len0*8 - 1; + + while (i >= 8) + { + *dest++ = lo; + lo = (lo >> 8U) | (hi << 24); + hi >>= 8; + i -= 8; + } + } + while (len0 == 5); + return dest-start; +} + +#ifdef TEST + +#include + +int main(int argc, char **argv) +{ +#if 0 + byte i[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39 }; + byte o[256], w[256]; + uns l; + l = base224_encode(o, i, sizeof(i)); + fwrite(o, 1, l, stdout); + fputc(0xaa, stdout); + l = base224_decode(w, o, l); + fwrite(w, 1, l, stdout); +#else + if (argc > 1) + { + byte i[BASE224_OUT_CHUNK*17], o[BASE224_IN_CHUNK*17]; + uns l; + while (l = fread(i, 1, sizeof(i), stdin)) + { + l = base224_decode(o, i, l); + fwrite(o, 1, l, stdout); + } + } + else + { + byte i[BASE224_IN_CHUNK*23], o[BASE224_OUT_CHUNK*23]; + uns l; + while (l = fread(i, 1, sizeof(i), stdin)) + { + l = base224_encode(o, i, l); + fwrite(o, 1, l, stdout); + } + } +#endif + + return 0; +} + +#endif diff --git a/lib/base224.h b/lib/base224.h new file mode 100644 index 00000000..128bdfba --- /dev/null +++ b/lib/base224.h @@ -0,0 +1,22 @@ +/* + * Base 224 Encoding & Decoding + * + * (c) 2002 Martin Mares + */ + +uns base224_encode(byte *dest, byte *src, uns len); +uns base224_decode(byte *dest, byte *src, uns len); + +/* + * Warning: when encoding, at least 4 bytes of extra space are needed. + * Better use this macro to calculate buffer size. + */ +#define BASE224_ENC_LENGTH(x) (((x)*8+38)/39*5) + +/* + * When called for BASE224_IN_CHUNK-byte chunks, the result will be + * always BASE224_OUT_CHUNK bytes long. If a longer block is split + * to such chunks, the result will be identical. + */ +#define BASE224_IN_CHUNK 39 +#define BASE224_OUT_CHUNK 40