2 * LiZaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
9 * The file format is based on LZO1X and
10 * the compression method is based on zlib.
14 #include "lib/lizard.h"
18 typedef u16 hash_ptr_t;
20 /* the position in the original text is implicit; it is computed by locate_string() */
21 hash_ptr_t next; // 0=end
22 hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
25 #define HASH_SIZE (1<<14) // size of hash-table
26 #define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
27 #define CHAIN_MAX_TESTS 8 // crop longer collision chains
28 #define CHAIN_GOOD_MATCH 32 // we already have a good match => end
34 return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
38 locate_string(byte *string, uns record_id, uns head)
39 /* The strings are recorded into the hash-table regularly, hence there is no
40 * need to store the pointer there. */
42 string += record_id - head;
43 if (record_id >= head)
44 string -= HASH_RECORDS-1;
49 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
50 /* hash_tab[hash] == record_id points to the head of the double-linked
51 * link-list of strings with the same hash. The records are statically
52 * stored in circular array hash_rec (with the 1st entry unused), and the
53 * pointers are just 16-bit indices. The strings in every collision chain
54 * are ordered by age. */
56 uns count = CHAIN_MAX_TESTS;
58 while (record_id && count-- > 0)
60 byte *record_string = locate_string(string, record_id, head);
61 byte *cmp = record_string;
62 if (cmp[0] == string[0] && cmp[2] == string[2])
63 /* implies cmp[1] == string[1] */
65 if (cmp[3] == string[3])
68 if (*cmp++ == string[4] && *cmp++ == string[5]
69 && *cmp++ == string[6] && *cmp++ == string[7])
71 byte *str = string + 8;
72 while (str <= string_end && *cmp++ == *str++);
77 uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
81 *best_ptr = record_string;
82 if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
86 record_id = hash_rec[record_id].next;
92 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
93 /* We reuse hash-records stored in a circular array. First, delete the old
94 * one and then add the new one in front of the link-list. */
96 struct hash_record *rec = hash_rec + head;
97 if (*to_delete) /* unlink the original record */
99 uns prev_id = rec->prev & ((1<<15)-1);
100 if (rec->prev & (1<<15)) /* was a head */
101 hash_tab[prev_id] = 0;
102 else /* thanks to the ordering, this was a tail */
103 hash_rec[prev_id].next = 0;
105 rec->next = hash_tab[hash];
106 rec->prev = (1<<15) | hash;
107 hash_rec[rec->next].prev = head;
108 hash_tab[hash] = head; /* add the new record before the link-list */
110 if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
119 dump_unary_value(byte *out, uns l)
131 flush_copy_command(uns bof, byte *out, byte *start, uns len)
133 if (bof && len <= 238)
137 /* cannot happen when !!bof */
138 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
139 #ifdef CPU_ALLOW_UNALIGNED
140 * (u32*) out = * (u32*) start;
150 /* leave 2 least significant bits of out[-2] set to 0 */
156 out = dump_unary_value(out, len - 18);
159 memcpy(out, start, len);
164 lizard_compress(byte *in, uns in_len, byte *out)
165 /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
166 * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
167 * allocated after in. Returns the actual compressed length. */
169 hash_ptr_t hash_tab[HASH_SIZE];
170 struct hash_record hash_rec[HASH_RECORDS];
171 byte *in_end = in + in_len;
172 byte *out_start = out;
173 byte *copy_start = in;
174 uns head = 1; /* 0 in unused */
175 uns to_delete = 0, bof = 1;
176 bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
179 uns hash = hashf(in);
181 uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
183 #if 0 // TODO: now, our routine does not detect matches of length 2
184 if (len == 2 && (in - best->string - 1) < (1<<10))
190 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
191 in++; /* add a literal */
195 if (in + len > in_end) /* crop EOF */
201 /* Record the match. */
202 uns copy_len = in - copy_start;
203 uns is_in_copy_mode = bof || copy_len >= 4;
204 uns shift = in - best - 1;
205 /* Try to use a 2-byte sequence. */
209 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
216 if (shift < (1<<11) && len <= 8)
218 shift |= (len-3 + 2)<<11;
221 out = flush_copy_command(bof, out, copy_start, copy_len);
222 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
223 *out++ = shift & 0xff;
225 else if (len == 3 && is_in_copy_mode)
227 if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
230 goto dump_2sequence; /* shift has 11 bits and contains also len */
232 else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
235 /* We have to use a 3-byte sequence. */
239 out = flush_copy_command(bof, out, copy_start, copy_len);
243 *out++ = (1<<5) | (len-2);
247 out = dump_unary_value(out, len - 33);
250 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
252 shift++; /* because shift==0 is reserved for EOF */
253 byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
255 *out++ = pos_bit | (len-2);
259 out = dump_unary_value(out, len - 9);
262 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
263 *out++ = shift & 0xff;
265 /* Update the hash-table. */
266 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
267 for (uns i=1; i<len; i++)
268 head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
273 uns copy_len = in - copy_start;
275 out = flush_copy_command(bof, out, copy_start, copy_len);
276 *out++ = 17; /* add EOF */
279 return out - out_start;
283 read_unary_value(byte *in, uns *val)
294 lizard_decompress(byte *in, byte *out)
295 /* Requires out being allocated for the decompressed length must be known
296 * beforehand. It is desirable to lock the following memory page for
297 * read-only access to prevent buffer overflow. Returns the actual
298 * decompressed length or a negative number when an error has occured. */
300 byte *out_start = out;
301 uns expect_copy_command = 1;
303 if (*in > 17) /* short copy command at BOF */
306 goto perform_copy_command;
313 if (expect_copy_command == 1)
317 in = read_unary_value(in, &len);
322 goto perform_copy_command;
326 pos = ((c&0xc)<<6) | *in++;
327 if (expect_copy_command == 2)
342 in = read_unary_value(in, &len);
347 pos |= (*in++ & 0xfc)<<6;
358 in = read_unary_value(in, &len);
363 pos = (*in++ & 0xfc)<<6;
367 else /* high bits encode the length */
369 len = ((c&0xe0)>>5) -2 +3;
374 /* take from the sliding window */
377 memcpy(out, out-pos, len);
382 for (; len-- > 0; out++)
385 /* extract the copy-bits */
389 expect_copy_command = 0;
390 #ifdef CPU_ALLOW_UNALIGNED
391 * (u32*) out = * (u32*) in;
400 expect_copy_command = 1;
403 perform_copy_command:
404 expect_copy_command = 2;
405 memcpy(out, in, len);
410 return out - out_start;
415 Description of the LZO1X format :
416 =================================
418 The meaning of the commands depends on the current mode. It can be either
419 the compressed mode or the copy mode. In some cases, the compressed mode
420 also distinguishes whether we just left the copy mode or not.
425 Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
426 so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
427 the following characters to the output and switch to the compressed mode.
428 If it is smaller, interpret it as a regular copy mode command.
433 Read the first byte of the sequence and determine the type of bit encoding by
434 looking at the most significant bits. The sequence is always at least 2 bytes
435 long. Decode sequences of these types until the EOF or END marker is read.
437 length L = length of the text taken from the sliding window
439 If L=0, then count the number Z of the following zero bytes and add Z*255
440 to the value of the following non-zero byte. This allows setting L
443 position p = relative position of the beginning of the text
445 Exception: 00010001 00000000 00000000 means EOF
447 copying C = length 1..3 of copied characters or END=0
449 C following characters will be copied from the compressed text to the
450 output. The number CC is always stored in the 2 least significant bits of
451 the second last byte of the sequence.
453 If END is read, the algorithm switches to the copy mode.
455 pattern length position
457 0000ppCC pppppppp 2 10 bits [default interpretation]
458 0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
459 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
460 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
461 LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
466 Read the first byte and, if the most significant bits are 0000, perform the
467 following command, otherwise switch to the compressed mode (and evaluate the
470 pattern length position
472 0000LLLL L* 4..18 + extend N/A
474 Copy L characters from the compressed text to the output. The overhead for
475 incompressible strings is only roughly 1/256 + epsilon.