X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lib%2Flizard.c;h=1da67ccddfc57dde8067e04689b9cbf034d39a8d;hb=3ff0c1878796f707ba889f1875190a578aea512e;hp=3b0026a0fa59ac7185d605ef9369c682401cbb11;hpb=3d551c5775e4d3ed84f8a1f9c7f4e7ca4b7b7d99;p=libucw.git diff --git a/lib/lizard.c b/lib/lizard.c index 3b0026a0..1da67ccd 100644 --- a/lib/lizard.c +++ b/lib/lizard.c @@ -6,7 +6,7 @@ * This software may be freely distributed and used according to the terms * of the GNU Lesser General Public License. * - * The file format is based on LZO1X and + * The file format is based on LZO1X and * the compression method is based on zlib. */ @@ -28,25 +28,25 @@ struct hash_record { #define CHAIN_GOOD_MATCH 32 // we already have a good match => end static inline uns -hashf(byte *string) +hashf(const byte *string) /* 0..HASH_SIZE-1 */ { return string[0] ^ (string[1]<<3) ^ (string[2]<<6); } static inline byte * -locate_string(byte *string, uns record_id, uns head) +locate_string(const byte *string, int record_id, int head) /* The strings are recorded into the hash-table regularly, hence there is no * need to store the pointer there. */ { string += record_id - head; if (record_id >= head) string -= HASH_RECORDS-1; - return string; + return (byte *)string; } static inline uns -find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head) +find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head) /* hash_tab[hash] == record_id points to the head of the double-linked * link-list of strings with the same hash. The records are statically * stored in circular array hash_rec (with the 1st entry unused), and the @@ -68,7 +68,7 @@ find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *stri if (*cmp++ == string[4] && *cmp++ == string[5] && *cmp++ == string[6] && *cmp++ == string[7]) { - byte *str = string + 8; + const byte *str = string + 8; while (str <= string_end && *cmp++ == *str++); } } @@ -128,7 +128,7 @@ dump_unary_value(byte *out, uns l) } static byte * -flush_copy_command(uns bof, byte *out, byte *start, uns len) +flush_copy_command(uns bof, byte *out, const byte *start, uns len) { if (bof && len <= 238) *out++ = len + 17; @@ -161,23 +161,23 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len) } int -lizard_compress(byte *in, uns in_len, byte *out) +lizard_compress(const byte *in, uns in_len, byte *out) /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY + * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters * allocated after in. Returns the actual compressed length. */ { hash_ptr_t hash_tab[HASH_SIZE]; struct hash_record hash_rec[HASH_RECORDS]; - byte *in_end = in + in_len; + const byte *in_end = in + in_len; byte *out_start = out; - byte *copy_start = in; + const byte *copy_start = in; uns head = 1; /* 0 in unused */ uns to_delete = 0, bof = 1; bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */ while (in < in_end) { uns hash = hashf(in); - byte *best; + byte *best = NULL; uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head); if (len < 3) #if 0 // TODO: now, our routine does not detect matches of length 2 @@ -280,18 +280,18 @@ dump_2sequence: } static inline byte * -read_unary_value(byte *in, uns *val) +read_unary_value(const byte *in, uns *val) { uns l = 0; while (!*in++) l += 255; l += in[-1]; *val = l; - return in; + return (byte *)in; } int -lizard_decompress(byte *in, byte *out) +lizard_decompress(const byte *in, byte *out) /* Requires out being allocated for the decompressed length must be known * beforehand. It is desirable to lock the following memory page for * read-only access to prevent buffer overflow. Returns the actual @@ -380,7 +380,8 @@ lizard_decompress(byte *in, byte *out) else { /* overlapping */ for (; len-- > 0; out++) - *out = out[-pos]; + *out = *(out-pos); + /* It's tempting to use out[-pos] above, but unfortunately it's not the same */ } /* extract the copy-bits */ len = in[-2] & 0x3; @@ -415,18 +416,22 @@ perform_copy_command: Description of the LZO1X format : ================================= +The meaning of the commands depends on the current mode. It can be either +the compressed mode or the copy mode. In some cases, the compressed mode +also distinguishes whether we just left the copy mode or not. + Beginning of file: ------------------ -If the first byte is 00010001, it means probably EOF (empty file), so switch -to the compressed mode. If it is bigger, subtract 17 and copy this number of -the following characters to the ouput and switch to the compressed mode. If -it is smaller, go to the copy mode. +Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file), +so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of +the following characters to the output and switch to the compressed mode. +If it is smaller, interpret it as a regular copy mode command. -Compressed mode : ------------------ +Compressed mode: +---------------- -Read the first byte of the sequence and determine the type of bit-encoding by +Read the first byte of the sequence and determine the type of bit encoding by looking at the most significant bits. The sequence is always at least 2 bytes long. Decode sequences of these types until the EOF or END marker is read. @@ -445,25 +450,23 @@ long. Decode sequences of these types until the EOF or END marker is read. C following characters will be copied from the compressed text to the output. The number CC is always stored in the 2 least significant bits of the second last byte of the sequence. - + If END is read, the algorithm switches to the copy mode. pattern length position -0000ppCC pppppppp 2 10 bits (*) -0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF +0000ppCC pppppppp 2 10 bits [default interpretation] +0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode] +0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF] 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits -01\ -10 \ -11 \ -LLLpppCC pppppppp 3..8 11 bits +LLLpppCC pppppppp 3..8 11 bits [LLL >= 010] -Copy mode : ------------ +Copy mode: +---------- Read the first byte and, if the most significant bits are 0000, perform the following command, otherwise switch to the compressed mode (and evaluate the -command). +command there). pattern length position @@ -472,9 +475,4 @@ pattern length position Copy L characters from the compressed text to the output. The overhead for incompressible strings is only roughly 1/256 + epsilon. -(*) After reading one copy command, switch to the compressed mode with the -following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2 -and 2048 is added to the position (now it is slightly more than 11 bits), -because a sequence of length 2 would never be used. - */