From: Robert Spalek Date: Fri, 11 Jun 2004 15:28:29 +0000 (+0000) Subject: implemented fast compression and decompression routines X-Git-Tag: holmes-import~1059 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=d94fc019538bc9c96b694bf0cb1924fbd9251a6b;p=libucw.git implemented fast compression and decompression routines I have spent a lot of time on them and they just compile. I have never tested them. The test program will follow soon. --- diff --git a/lib/lizard.c b/lib/lizard.c new file mode 100644 index 00000000..c71fc9ac --- /dev/null +++ b/lib/lizard.c @@ -0,0 +1,491 @@ +/* + * LiZzaRd -- Fast compression method based on Lempel-Ziv 77 + * + * (c) 2004, Robert Spalek + * + * The file format is based on LZO1X and + * the compression method is based on zlib. + */ + +#include "lib/lib.h" +#include "lib/lizzard.h" + +#include + +typedef u16 hash_ptr_t; +struct hash_record { + byte *string; // in original text + hash_ptr_t next4, next3; // 0=end +}; + +#define HASH_SIZE ((1<<14) + 27) // prime, size of hash-table +#define HASH_RECORDS ((1<<15) - 1) // maximum number of records in hash-table +#define CHAIN_MAX_TESTS 16 // crop longer collision chains +#define CHAIN_GOOD_MATCH 16 // we already have a good match => end + +static inline uns +hashf(byte *string) +{ + uns hash; +#ifdef CPU_ALLOW_UNALIGNED + hash = * (uns *) string; +#elif CPU_LITTLE_ENDIAN + hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24); +#else + hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24); +#endif + return hash; +} +#ifdef CPU_LITTLE_ENDIAN +#define MASK_4TH_CHAR (~(0xff << 24)) +#else +#define MASK_4TH_CHAR (~0xff) +#endif + +static inline uns +find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr) + /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence + * some branches of the code will be pruned). + * + * hash_tab[hash] points to the 1st record of the link-list of strings with the + * same hash. The records are statically stored in circular array hash_rec + * (with 1st entry unused), and the pointers are just 16-bit indices. In one + * array, we store 2 independent link-lists: with 3 and 4 hashed characters. + * + * Updates are performed more often than searches, so they are lazy: we only + * add new records in front of link-lists and never delete them. By reusing + * an already allocated space in the circular array we necessarily modify + * records that are already pointed at in the middle of some link-list. To + * prevent confusion, this function checks 2 invariants: the strings in a + * collision chain are ordered, and the hash of the 1st record in the chain + * must match. */ +{ + if (list_id == 3) /* Find the 1st record in the collision chain. */ + hash = hash & MASK_4TH_CHAR; + hash %= HASH_SIZE; + if (!hash_tab[hash]) + return 0; + struct hash_record *record = hash_rec + hash_tab[hash]; + uns hash2 = hashf(record->string); + if (list_id == 3) + hash2 = hash2 & MASK_4TH_CHAR; + hash2 %= HASH_SIZE; + if (hash2 != hash) /* Verify the hash-value. */ + { + hash_tab[hash] = 0; /* prune */ + return 0; + } + + uns count = CHAIN_MAX_TESTS; + uns best_len = 0; + byte *last_string = string_end; + hash_ptr_t *last_ptr = NULL; + while (count-- > 0) + { + byte *cmp = record->string; + if (cmp >= last_string) /* collision chain is ordered by age */ + { + *last_ptr = 0; /* prune */ + break; + } + if (cmp[0] == string[0] && cmp[2] == string[2]) + { + if (cmp[3] == string[3]) + { + /* implies cmp[1] == string[1], becase we hash into 14+eps bits */ + cmp += 4; + if (*cmp++ == string[4] && *cmp++ == string[5] + && *cmp++ == string[6] && *cmp++ == string[7]) + { + byte *str = string + 8; + while (str <= string_end && *cmp++ == *string++); + } + } + else if (list_id == 3) + /* hashing 3 characters implies cmp[1] == string[1] + hashing 4 characters implies cmp[1] != string[1] */ + cmp += 4; + else + goto next_collision; + uns len = cmp - record->string - 1; /* cmp points 2 characters after the last match */ + if (len > best_len) + { + best_len = len; + *best_ptr = record->string; + if (best_len >= CHAIN_GOOD_MATCH) /* optimisation */ + break; + } + } +next_collision: + if (list_id == 3) + { + if (!record->next3) + break; + last_ptr = &record->next3; + record = hash_rec + record->next3; + } + else + { + if (!record->next4) + break; + last_ptr = &record->next4; + record = hash_rec + record->next4; + } + } + return best_len; +} + +static inline uns +hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head) +{ + uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE; + uns h4 = hash % HASH_SIZE; + struct hash_record *rec = hash_rec + head; + rec->string = string; + rec->next3 = tab3[h3]; + rec->next4 = tab4[h4]; + tab3[h3] = head; /* add the new record before the link-list */ + tab4[h4] = head; + head++; /* circular buffer, reuse old records */ + if (head >= HASH_RECORDS) + head = 0; + return head; +} + +static inline byte * +dump_unary_value(byte *out, uns l) +{ + while (l > 255) + { + l -= 255; + *out++ = 0; + } + *out++ = l; + return out; +} + +static byte * +flush_copy_command(uns bof, byte *out, byte *start, uns len) +{ + if (bof && len <= 238) + *out++ = len + 17; + else if (len < 4) + /* cannot happen when !!bof */ + out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */ + else + { + /* leave 2 least significant bits of out[-2] set to 0 */ + if (len <= 18) + *out++ = len - 3; + else + { + *out++ = 0; + out = dump_unary_value(out, len - 18); + } + } + while (len-- > 0) + *out++ = *start++; + return out; +} + +int +lizzard_compress(byte *in, uns in_len, byte *out) + /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR. + * There must be at least 8 characters allocated after in. + * Returns the actual compressed length. */ +{ + hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE]; + struct hash_record hash_rec[HASH_RECORDS]; + byte *in_start = in; + byte *in_end = in + in_len; + byte *out_start = out; + byte *copy_start = in; + uns head = 0; + bzero(hash_tab3, sizeof(hash_tab3)); /* init the hash-table */ + bzero(hash_tab4, sizeof(hash_tab4)); + while (in < in_end) + { + uns hash = hashf(in); + byte *best; + uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best); + if (len >= 3) + goto match; + len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best); + +match_found: + if (len >= 3) + goto match; +#if 0 // TODO: now, our routine does not detect matches of length 2 + if (len == 2 && (in - best->string) < ((1<<10) + 1)) + goto match; +#endif +literal: + head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head); + in++; /* add a literal */ + continue; + +match: + if (in + len > in_end) /* crop EOF */ + { + len = in_end - in; + if (len < 3) + goto match_found; + } + /* Record the match. */ + uns copy_len = in - copy_start; + uns is_in_copy_mode = copy_start==in_start || copy_len >= 4; + uns shift = in - best - 1; + /* Try to use a 2-byte sequence. */ + if (len == 2) + { + if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */ + goto literal; + +dump_2sequence: + if (copy_len > 0) + out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */ + *out++ = shift & 0xff; + } + else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10)) + { + /* optimisation for length-3 matches after a copy command */ + shift -= 1<<11; + goto dump_2sequence; + } + /* now, len >= 3 */ + else if (shift < (1<<11) && len <= 8) + { + shift |= (len-3 + 2)<<11; + goto dump_2sequence; /* shift has 11 bits and contains also len */ + } + /* We have to use a 3-byte sequence. */ + else if (len == 3 && is_in_copy_mode) + /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ + goto literal; + else + { + if (copy_len > 0) + out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + if (shift < (1<<14)) + { + if (len <= 33) + *out++ = (1<<5) | (len-2); + else + { + *out++ = 1<<5; + out = dump_unary_value(out, len - 33); + } + } + else /* shift < (1<<15)-1 becase of HASH_RECORDS */ + { + shift++; /* because shift==0 is reserved for EOF */ + byte pos_bit = (shift>>11) & (1<<3); + if (len <= 9) + *out++ = (1<<4) | pos_bit | (len-2); + else + { + *out++ = (1<<4) | pos_bit; + out = dump_unary_value(out, len - 9); + } + } + *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */ + *out++ = shift & 0xff; + } + /* Update the hash-table. */ + head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head); + for (uns i=1; i in_end) /* crop at BOF */ + in = in_end; + uns copy_len = in - copy_start; + if (copy_len > 0) + out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + *out++ = 17; /* add EOF */ + *out++ = 0; + *out++ = 0; + return out - out_start; +} + +static inline byte * +read_unary_value(byte *in, uns *val) +{ + uns l = 0; + while (!*in++) + l += 255; + l += in[-1]; + *val = l; + return in; +} + +int +lizzard_decompress(byte *in, byte *out) + /* Requires out being allocated for the decompressed length must be known + * beforehand. It is desirable to lock the following memory page for + * read-only access to prevent buffer overflow. Returns the actual + * decompressed length or a negative number when an error has occured. */ +{ + byte *out_start = out; + uns expect_copy_command = 1; + uns len; + if (*in > 17) /* short copy command at BOF */ + { + len = *in++ - 17; + expect_copy_command = 2; + goto perform_copy_command; + } + while (1) + { + uns c = *in++; + uns pos; + if (c < 0x10) + if (expect_copy_command == 1) + { + if (!c) + in = read_unary_value(in, &len); + else + len = c; + expect_copy_command = 2; + goto perform_copy_command; + } + else + { + pos = (c&0xc)<<6 | *in++; + if (expect_copy_command == 2) + { + pos += 1<<11; + len = 3; + } + else + len = 2; + } + else if (c < 0x20) + { + pos = (c&0x8)<<11; + len = c&0x7; + if (len) + { + in = read_unary_value(in, &len); + len += 9; + } + else + len += 2; + pos |= (*in++ & 0xfc)<<6; + pos |= *in++; + if (!pos) /* EOF */ + break; + else /* shift it back */ + pos--; + } + else if (c < 0x40) + { + len = c&0x1f; + if (len) + { + in = read_unary_value(in, &len); + len += 33; + } + else + len += 2; + pos = (*in++ & 0xfc)<<6; + pos |= *in++; + } + else /* high bits encode the length */ + { + len = (c&0xe)>>5 -2 +3; + pos = (c&0x1c)<<6; + pos |= *in++; + } + /* take from the sliding window */ + memcpy(out, in-1-pos, len); //FIXME: overlapping + out += len; + /* extract the copy-bits */ + len = in[-2] & 0x3; + if (len) + expect_copy_command = 0; /* and fall-thru */ + else + { + expect_copy_command = 1; + continue; + } + +perform_copy_command: + memcpy(out, in, len); + out += len; + in += len; + } + + return out - out_start; +} + +/* + +Description of the LZO1X format : +================================= + +Beginning of file: +------------------ + +If the first byte is 00010001, it means probably EOF (empty file), so switch +to the compressed mode. If it is bigger, subtract 17 and copy this number of +the following characters to the ouput and switch to the compressed mode. If +it is smaller, go to the copy mode. + +Compressed mode : +----------------- + +Read the first byte of the sequence and determine the type of bit-encoding by +looking at the most significant bits. The sequence is always at least 2 bytes +long. Decode sequences of these types until the EOF or END marker is read. + + length L = length of the text taken from the sliding window + + If L=0, then count the number Z of the following zero bytes and add Z*255 + to the value of the following non-zero byte. This allows setting L + arbitrarily high. + + position p = relative position of the beginning of the text + + Exception: 00010001 00000000 00000000 means EOF + + copying C = length 1..3 of copied characters or END=0 + + C following characters will be copied from the compressed text to the + output. The number CC is always stored in the 2 least significant bits of + the second last byte of the sequence. + + If END is read, the algorithm switches to the copy mode. + +pattern length position + +0000ppCC pppppppp 2 10 bits (*) +0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF +001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits +01\ +10 \ +11 \ +LLLpppCC pppppppp 3..8 11 bits + +Copy mode : +----------- + +Read the first byte and, if the most significant bits are 0000, perform the +following command, otherwise switch to the compressed mode (and evaluate the +command). + +pattern length position + +0000LLLL L* 4..18 + extend N/A + + Copy L characters from the compressed text to the output. The overhead for + incompressible strings is only roughly 1/256 + epsilon. + +(*) After reading one copy command, switch to the compressed mode with the +following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2 +and 2048 is added to the position (now it is slightly more than 11 bits), +because a sequence of length 2 would never be used. + +*/ diff --git a/lib/lizard.h b/lib/lizard.h new file mode 100644 index 00000000..656aab8d --- /dev/null +++ b/lib/lizard.h @@ -0,0 +1,16 @@ +/* + * LiZzaRd -- Fast compression method based on Lempel-Ziv 77 + * + * (c) 2004, Robert Spalek + * + * The file format is based on LZO1X and + * the compression method is based on zlib. + */ + +#define LIZZARD_MAX_PROLONG_FACTOR 129/128 + /* Incompressible string of length 256 has a 2-byte header. + * Hence the scheme the file length get multiplied by 129/128 in the worst + * case + at most 4 bytes are added for header and EOF. */ + +int lizzard_compress(byte *in, uns in_len, byte *out); +int lizzard_decompress(byte *in, byte *out);