X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Flizard.c;h=6167fee043ba90c07befef746cd6bd917c5c21d7;hb=8107422b6cc0fda08e55218a3a79ff1580779a95;hp=c71fc9accb52b85a7a49281bdf2e42c3d40df2fd;hpb=d94fc019538bc9c96b694bf0cb1924fbd9251a6b;p=libucw.git diff --git a/lib/lizard.c b/lib/lizard.c index c71fc9ac..6167fee0 100644 --- a/lib/lizard.c +++ b/lib/lizard.c @@ -1,154 +1,117 @@ /* - * LiZzaRd -- Fast compression method based on Lempel-Ziv 77 + * LiZaRd -- Fast compression method based on Lempel-Ziv 77 * * (c) 2004, Robert Spalek * + * This software may be freely distributed and used according to the terms + * of the GNU Lesser General Public License. + * * The file format is based on LZO1X and * the compression method is based on zlib. */ #include "lib/lib.h" -#include "lib/lizzard.h" +#include "lib/lizard.h" #include typedef u16 hash_ptr_t; struct hash_record { - byte *string; // in original text - hash_ptr_t next4, next3; // 0=end + /* the position in the original text is implicit; it is computed by locate_string() */ + hash_ptr_t next; // 0=end + hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf) }; -#define HASH_SIZE ((1<<14) + 27) // prime, size of hash-table -#define HASH_RECORDS ((1<<15) - 1) // maximum number of records in hash-table -#define CHAIN_MAX_TESTS 16 // crop longer collision chains -#define CHAIN_GOOD_MATCH 16 // we already have a good match => end +#define HASH_SIZE (1<<14) // size of hash-table +#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1 +#define CHAIN_MAX_TESTS 8 // crop longer collision chains +#define CHAIN_GOOD_MATCH 32 // we already have a good match => end static inline uns hashf(byte *string) + /* 0..HASH_SIZE-1 */ { - uns hash; -#ifdef CPU_ALLOW_UNALIGNED - hash = * (uns *) string; -#elif CPU_LITTLE_ENDIAN - hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24); -#else - hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24); -#endif - return hash; + return string[0] ^ (string[1]<<3) ^ (string[2]<<6); } -#ifdef CPU_LITTLE_ENDIAN -#define MASK_4TH_CHAR (~(0xff << 24)) -#else -#define MASK_4TH_CHAR (~0xff) -#endif -static inline uns -find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr) - /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence - * some branches of the code will be pruned). - * - * hash_tab[hash] points to the 1st record of the link-list of strings with the - * same hash. The records are statically stored in circular array hash_rec - * (with 1st entry unused), and the pointers are just 16-bit indices. In one - * array, we store 2 independent link-lists: with 3 and 4 hashed characters. - * - * Updates are performed more often than searches, so they are lazy: we only - * add new records in front of link-lists and never delete them. By reusing - * an already allocated space in the circular array we necessarily modify - * records that are already pointed at in the middle of some link-list. To - * prevent confusion, this function checks 2 invariants: the strings in a - * collision chain are ordered, and the hash of the 1st record in the chain - * must match. */ +static inline byte * +locate_string(byte *string, uns record_id, uns head) + /* The strings are recorded into the hash-table regularly, hence there is no + * need to store the pointer there. */ { - if (list_id == 3) /* Find the 1st record in the collision chain. */ - hash = hash & MASK_4TH_CHAR; - hash %= HASH_SIZE; - if (!hash_tab[hash]) - return 0; - struct hash_record *record = hash_rec + hash_tab[hash]; - uns hash2 = hashf(record->string); - if (list_id == 3) - hash2 = hash2 & MASK_4TH_CHAR; - hash2 %= HASH_SIZE; - if (hash2 != hash) /* Verify the hash-value. */ - { - hash_tab[hash] = 0; /* prune */ - return 0; - } + string += record_id - head; + if (record_id >= head) + string -= HASH_RECORDS-1; + return string; +} +static inline uns +find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head) + /* hash_tab[hash] == record_id points to the head of the double-linked + * link-list of strings with the same hash. The records are statically + * stored in circular array hash_rec (with the 1st entry unused), and the + * pointers are just 16-bit indices. The strings in every collision chain + * are ordered by age. */ +{ uns count = CHAIN_MAX_TESTS; uns best_len = 0; - byte *last_string = string_end; - hash_ptr_t *last_ptr = NULL; - while (count-- > 0) + while (record_id && count-- > 0) { - byte *cmp = record->string; - if (cmp >= last_string) /* collision chain is ordered by age */ - { - *last_ptr = 0; /* prune */ - break; - } + byte *record_string = locate_string(string, record_id, head); + byte *cmp = record_string; if (cmp[0] == string[0] && cmp[2] == string[2]) + /* implies cmp[1] == string[1] */ { if (cmp[3] == string[3]) { - /* implies cmp[1] == string[1], becase we hash into 14+eps bits */ cmp += 4; if (*cmp++ == string[4] && *cmp++ == string[5] && *cmp++ == string[6] && *cmp++ == string[7]) { byte *str = string + 8; - while (str <= string_end && *cmp++ == *string++); + while (str <= string_end && *cmp++ == *str++); } } - else if (list_id == 3) - /* hashing 3 characters implies cmp[1] == string[1] - hashing 4 characters implies cmp[1] != string[1] */ - cmp += 4; else - goto next_collision; - uns len = cmp - record->string - 1; /* cmp points 2 characters after the last match */ + cmp += 4; + uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */ if (len > best_len) { best_len = len; - *best_ptr = record->string; - if (best_len >= CHAIN_GOOD_MATCH) /* optimisation */ + *best_ptr = record_string; + if (best_len >= CHAIN_GOOD_MATCH) /* optimization */ break; } } -next_collision: - if (list_id == 3) - { - if (!record->next3) - break; - last_ptr = &record->next3; - record = hash_rec + record->next3; - } - else - { - if (!record->next4) - break; - last_ptr = &record->next4; - record = hash_rec + record->next4; - } + record_id = hash_rec[record_id].next; } return best_len; } -static inline uns -hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head) +static uns +hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete) + /* We reuse hash-records stored in a circular array. First, delete the old + * one and then add the new one in front of the link-list. */ { - uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE; - uns h4 = hash % HASH_SIZE; struct hash_record *rec = hash_rec + head; - rec->string = string; - rec->next3 = tab3[h3]; - rec->next4 = tab4[h4]; - tab3[h3] = head; /* add the new record before the link-list */ - tab4[h4] = head; - head++; /* circular buffer, reuse old records */ - if (head >= HASH_RECORDS) - head = 0; + if (*to_delete) /* unlink the original record */ + { + uns prev_id = rec->prev & ((1<<15)-1); + if (rec->prev & (1<<15)) /* was a head */ + hash_tab[prev_id] = 0; + else /* thanks to the ordering, this was a tail */ + hash_rec[prev_id].next = 0; + } + rec->next = hash_tab[hash]; + rec->prev = (1<<15) | hash; + hash_rec[rec->next].prev = head; + hash_tab[hash] = head; /* add the new record before the link-list */ + + if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */ + { + head = 1; + *to_delete = 1; + } return head; } @@ -170,8 +133,18 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len) if (bof && len <= 238) *out++ = len + 17; else if (len < 4) + { /* cannot happen when !!bof */ out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */ +#ifdef CPU_ALLOW_UNALIGNED + * (u32*) out = * (u32*) start; + return out + len; +#else + while (len-- > 0) + *out++ = *start++; + return out; +#endif + } else { /* leave 2 least significant bits of out[-2] set to 0 */ @@ -183,90 +156,87 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len) out = dump_unary_value(out, len - 18); } } - while (len-- > 0) - *out++ = *start++; - return out; + memcpy(out, start, len); + return out + len; } int -lizzard_compress(byte *in, uns in_len, byte *out) - /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR. - * There must be at least 8 characters allocated after in. - * Returns the actual compressed length. */ +lizard_compress(byte *in, uns in_len, byte *out) + /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY + + * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters + * allocated after in. Returns the actual compressed length. */ { - hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE]; + hash_ptr_t hash_tab[HASH_SIZE]; struct hash_record hash_rec[HASH_RECORDS]; - byte *in_start = in; byte *in_end = in + in_len; byte *out_start = out; byte *copy_start = in; - uns head = 0; - bzero(hash_tab3, sizeof(hash_tab3)); /* init the hash-table */ - bzero(hash_tab4, sizeof(hash_tab4)); + uns head = 1; /* 0 in unused */ + uns to_delete = 0, bof = 1; + bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */ while (in < in_end) { uns hash = hashf(in); byte *best; - uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best); - if (len >= 3) - goto match; - len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best); - -match_found: - if (len >= 3) - goto match; + uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head); + if (len < 3) #if 0 // TODO: now, our routine does not detect matches of length 2 - if (len == 2 && (in - best->string) < ((1<<10) + 1)) - goto match; + if (len == 2 && (in - best->string - 1) < (1<<10)) + { /* pass-thru */ } + else #endif + { literal: - head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head); - in++; /* add a literal */ - continue; + head = hash_string(hash_tab, hash, hash_rec, head, &to_delete); + in++; /* add a literal */ + continue; + } -match: if (in + len > in_end) /* crop EOF */ { len = in_end - in; if (len < 3) - goto match_found; + goto literal; } /* Record the match. */ uns copy_len = in - copy_start; - uns is_in_copy_mode = copy_start==in_start || copy_len >= 4; + uns is_in_copy_mode = bof || copy_len >= 4; uns shift = in - best - 1; /* Try to use a 2-byte sequence. */ +#if 0 if (len == 2) { if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */ goto literal; - + else + goto dump_2sequence; + } else +#endif + /* now, len >= 3 */ + if (shift < (1<<11) && len <= 8) + { + shift |= (len-3 + 2)<<11; dump_2sequence: - if (copy_len > 0) - out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + if (copy_len) + out = flush_copy_command(bof, out, copy_start, copy_len); *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */ *out++ = shift & 0xff; } - else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10)) - { - /* optimisation for length-3 matches after a copy command */ - shift -= 1<<11; - goto dump_2sequence; - } - /* now, len >= 3 */ - else if (shift < (1<<11) && len <= 8) + else if (len == 3 && is_in_copy_mode) { - shift |= (len-3 + 2)<<11; - goto dump_2sequence; /* shift has 11 bits and contains also len */ + if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */ + { + shift -= 1<<11; + goto dump_2sequence; /* shift has 11 bits and contains also len */ + } + else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ + goto literal; } /* We have to use a 3-byte sequence. */ - else if (len == 3 && is_in_copy_mode) - /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ - goto literal; else { - if (copy_len > 0) - out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + if (copy_len) + out = flush_copy_command(bof, out, copy_start, copy_len); if (shift < (1<<14)) { if (len <= 33) @@ -280,12 +250,12 @@ dump_2sequence: else /* shift < (1<<15)-1 becase of HASH_RECORDS */ { shift++; /* because shift==0 is reserved for EOF */ - byte pos_bit = (shift>>11) & (1<<3); + byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4); if (len <= 9) - *out++ = (1<<4) | pos_bit | (len-2); + *out++ = pos_bit | (len-2); else { - *out++ = (1<<4) | pos_bit; + *out++ = pos_bit; out = dump_unary_value(out, len - 9); } } @@ -293,17 +263,16 @@ dump_2sequence: *out++ = shift & 0xff; } /* Update the hash-table. */ - head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head); + head = hash_string(hash_tab, hash, hash_rec, head, &to_delete); for (uns i=1; i in_end) /* crop at BOF */ - in = in_end; uns copy_len = in - copy_start; - if (copy_len > 0) - out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len); + if (copy_len) + out = flush_copy_command(bof, out, copy_start, copy_len); *out++ = 17; /* add EOF */ *out++ = 0; *out++ = 0; @@ -322,7 +291,7 @@ read_unary_value(byte *in, uns *val) } int -lizzard_decompress(byte *in, byte *out) +lizard_decompress(byte *in, byte *out) /* Requires out being allocated for the decompressed length must be known * beforehand. It is desirable to lock the following memory page for * read-only access to prevent buffer overflow. Returns the actual @@ -334,7 +303,6 @@ lizzard_decompress(byte *in, byte *out) if (*in > 17) /* short copy command at BOF */ { len = *in++ - 17; - expect_copy_command = 2; goto perform_copy_command; } while (1) @@ -345,15 +313,17 @@ lizzard_decompress(byte *in, byte *out) if (expect_copy_command == 1) { if (!c) + { in = read_unary_value(in, &len); + len += 18; + } else - len = c; - expect_copy_command = 2; + len = c + 3; goto perform_copy_command; } else { - pos = (c&0xc)<<6 | *in++; + pos = ((c&0xc)<<6) | *in++; if (expect_copy_command == 2) { pos += 1<<11; @@ -361,12 +331,13 @@ lizzard_decompress(byte *in, byte *out) } else len = 2; + pos++; } else if (c < 0x20) { pos = (c&0x8)<<11; len = c&0x7; - if (len) + if (!len) { in = read_unary_value(in, &len); len += 9; @@ -377,13 +348,12 @@ lizzard_decompress(byte *in, byte *out) pos |= *in++; if (!pos) /* EOF */ break; - else /* shift it back */ - pos--; + /* do NOT pos++ */ } else if (c < 0x40) { len = c&0x1f; - if (len) + if (!len) { in = read_unary_value(in, &len); len += 33; @@ -392,27 +362,46 @@ lizzard_decompress(byte *in, byte *out) len += 2; pos = (*in++ & 0xfc)<<6; pos |= *in++; + pos++; } else /* high bits encode the length */ { - len = (c&0xe)>>5 -2 +3; + len = ((c&0xe0)>>5) -2 +3; pos = (c&0x1c)<<6; pos |= *in++; + pos++; } /* take from the sliding window */ - memcpy(out, in-1-pos, len); //FIXME: overlapping - out += len; + if (len <= pos) + { + memcpy(out, out-pos, len); + out += len; + } + else + { /* overlapping */ + for (; len-- > 0; out++) + *out = out[-pos]; + } /* extract the copy-bits */ len = in[-2] & 0x3; if (len) - expect_copy_command = 0; /* and fall-thru */ - else { - expect_copy_command = 1; - continue; + expect_copy_command = 0; +#ifdef CPU_ALLOW_UNALIGNED + * (u32*) out = * (u32*) in; + out += len; + in += len; +#else + while (len-- > 0) + *out++ = *in++; +#endif } + else + expect_copy_command = 1; + continue; perform_copy_command: + expect_copy_command = 2; memcpy(out, in, len); out += len; in += len; @@ -426,18 +415,22 @@ perform_copy_command: Description of the LZO1X format : ================================= +The meaning of the commands depends on the current mode. It can be either +the compressed mode or the copy mode. In some cases, the compressed mode +also distinguishes whether we just left the copy mode or not. + Beginning of file: ------------------ -If the first byte is 00010001, it means probably EOF (empty file), so switch -to the compressed mode. If it is bigger, subtract 17 and copy this number of -the following characters to the ouput and switch to the compressed mode. If -it is smaller, go to the copy mode. +Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file), +so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of +the following characters to the output and switch to the compressed mode. +If it is smaller, interpret it as a regular copy mode command. -Compressed mode : ------------------ +Compressed mode: +---------------- -Read the first byte of the sequence and determine the type of bit-encoding by +Read the first byte of the sequence and determine the type of bit encoding by looking at the most significant bits. The sequence is always at least 2 bytes long. Decode sequences of these types until the EOF or END marker is read. @@ -461,20 +454,18 @@ long. Decode sequences of these types until the EOF or END marker is read. pattern length position -0000ppCC pppppppp 2 10 bits (*) -0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF +0000ppCC pppppppp 2 10 bits [default interpretation] +0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode] +0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF] 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits -01\ -10 \ -11 \ -LLLpppCC pppppppp 3..8 11 bits +LLLpppCC pppppppp 3..8 11 bits [LLL >= 010] -Copy mode : ------------ +Copy mode: +---------- Read the first byte and, if the most significant bits are 0000, perform the following command, otherwise switch to the compressed mode (and evaluate the -command). +command there). pattern length position @@ -483,9 +474,4 @@ pattern length position Copy L characters from the compressed text to the output. The overhead for incompressible strings is only roughly 1/256 + epsilon. -(*) After reading one copy command, switch to the compressed mode with the -following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2 -and 2048 is added to the position (now it is slightly more than 11 bits), -because a sequence of length 2 would never be used. - */