2 * LiZaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
9 * The file format is based on LZO1X and
10 * the compression method is based on zlib.
14 #include "lib/lizard.h"
18 typedef u16 hash_ptr_t;
20 /* the position in the original text is implicit; it is computed by locate_string() */
21 hash_ptr_t next; // 0=end
22 hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
25 #define HASH_SIZE (1<<14) // size of hash-table
26 #define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
27 #define CHAIN_MAX_TESTS 8 // crop longer collision chains
28 #define CHAIN_GOOD_MATCH 32 // we already have a good match => end
34 return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
38 locate_string(byte *string, uns record_id, uns head)
39 /* The strings are recorded into the hash-table regularly, hence there is no
40 * need to store the pointer there. */
42 string += record_id - head;
43 if (record_id >= head)
44 string -= HASH_RECORDS-1;
49 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
50 /* hash_tab[hash] == record_id points to the head of the double-linked
51 * link-list of strings with the same hash. The records are statically
52 * stored in circular array hash_rec (with the 1st entry unused), and the
53 * pointers are just 16-bit indices. The strings in every collision chain
54 * are ordered by age. */
56 uns count = CHAIN_MAX_TESTS;
58 while (record_id && count-- > 0)
60 byte *record_string = locate_string(string, record_id, head);
61 byte *cmp = record_string;
62 if (cmp[0] == string[0] && cmp[2] == string[2])
63 /* implies cmp[1] == string[1] */
65 if (cmp[3] == string[3])
68 if (*cmp++ == string[4] && *cmp++ == string[5]
69 && *cmp++ == string[6] && *cmp++ == string[7])
71 byte *str = string + 8;
72 while (str <= string_end && *cmp++ == *str++);
77 uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
81 *best_ptr = record_string;
82 if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
86 record_id = hash_rec[record_id].next;
92 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
93 /* We reuse hash-records stored in a circular array. First, delete the old
94 * one and then add the new one in front of the link-list. */
96 struct hash_record *rec = hash_rec + head;
97 if (*to_delete) /* unlink the original record */
99 uns prev_id = rec->prev & ((1<<15)-1);
100 if (rec->prev & (1<<15)) /* was a head */
101 hash_tab[prev_id] = 0;
102 else /* thanks to the ordering, this was a tail */
103 hash_rec[prev_id].next = 0;
105 rec->next = hash_tab[hash];
106 rec->prev = (1<<15) | hash;
107 hash_rec[rec->next].prev = head;
108 hash_tab[hash] = head; /* add the new record before the link-list */
110 if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
119 dump_unary_value(byte *out, uns l)
131 flush_copy_command(uns bof, byte *out, byte *start, uns len)
133 if (bof && len <= 238)
137 /* cannot happen when !!bof */
138 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
139 #ifdef CPU_ALLOW_UNALIGNED
140 * (u32*) out = * (u32*) start;
150 /* leave 2 least significant bits of out[-2] set to 0 */
156 out = dump_unary_value(out, len - 18);
159 memcpy(out, start, len);
164 lizard_compress(byte *in, uns in_len, byte *out)
165 /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
166 * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
167 * allocated after in. Returns the actual compressed length. */
169 hash_ptr_t hash_tab[HASH_SIZE];
170 struct hash_record hash_rec[HASH_RECORDS];
171 byte *in_end = in + in_len;
172 byte *out_start = out;
173 byte *copy_start = in;
174 uns head = 1; /* 0 in unused */
175 uns to_delete = 0, bof = 1;
176 bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
179 uns hash = hashf(in);
181 uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
183 #if 0 // TODO: now, our routine does not detect matches of length 2
184 if (len == 2 && (in - best->string) < ((1<<10) + 1))
190 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
191 in++; /* add a literal */
195 if (in + len > in_end) /* crop EOF */
201 /* Record the match. */
202 uns copy_len = in - copy_start;
203 uns is_in_copy_mode = bof || copy_len >= 4;
204 uns shift = in - best - 1;
205 /* Try to use a 2-byte sequence. */
209 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
215 if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
217 /* optimisation for length-3 matches after a copy command */
221 out = flush_copy_command(bof, out, copy_start, copy_len);
222 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
223 *out++ = shift & 0xff;
226 else if (shift < (1<<11) && len <= 8)
228 shift |= (len-3 + 2)<<11;
229 goto dump_2sequence; /* shift has 11 bits and contains also len */
231 /* We have to use a 3-byte sequence. */
232 else if (len == 3 && is_in_copy_mode)
233 /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
238 out = flush_copy_command(bof, out, copy_start, copy_len);
242 *out++ = (1<<5) | (len-2);
246 out = dump_unary_value(out, len - 33);
249 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
251 shift++; /* because shift==0 is reserved for EOF */
252 byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
254 *out++ = pos_bit | (len-2);
258 out = dump_unary_value(out, len - 9);
261 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
262 *out++ = shift & 0xff;
264 /* Update the hash-table. */
265 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
266 for (uns i=1; i<len; i++)
267 head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
272 uns copy_len = in - copy_start;
274 out = flush_copy_command(bof, out, copy_start, copy_len);
275 *out++ = 17; /* add EOF */
278 return out - out_start;
282 read_unary_value(byte *in, uns *val)
293 lizard_decompress(byte *in, byte *out)
294 /* Requires out being allocated for the decompressed length must be known
295 * beforehand. It is desirable to lock the following memory page for
296 * read-only access to prevent buffer overflow. Returns the actual
297 * decompressed length or a negative number when an error has occured. */
299 byte *out_start = out;
300 uns expect_copy_command = 1;
302 if (*in > 17) /* short copy command at BOF */
305 goto perform_copy_command;
312 if (expect_copy_command == 1)
316 in = read_unary_value(in, &len);
321 goto perform_copy_command;
325 pos = ((c&0xc)<<6) | *in++;
326 if (expect_copy_command == 2)
341 in = read_unary_value(in, &len);
346 pos |= (*in++ & 0xfc)<<6;
357 in = read_unary_value(in, &len);
362 pos = (*in++ & 0xfc)<<6;
366 else /* high bits encode the length */
368 len = ((c&0xe0)>>5) -2 +3;
373 /* take from the sliding window */
376 memcpy(out, out-pos, len);
381 for (; len-- > 0; out++)
384 /* extract the copy-bits */
388 expect_copy_command = 0;
389 #ifdef CPU_ALLOW_UNALIGNED
390 * (u32*) out = * (u32*) in;
399 expect_copy_command = 1;
402 perform_copy_command:
403 expect_copy_command = 2;
404 memcpy(out, in, len);
409 return out - out_start;
414 Description of the LZO1X format :
415 =================================
420 If the first byte is 00010001, it means probably EOF (empty file), so switch
421 to the compressed mode. If it is bigger, subtract 17 and copy this number of
422 the following characters to the ouput and switch to the compressed mode. If
423 it is smaller, go to the copy mode.
428 Read the first byte of the sequence and determine the type of bit-encoding by
429 looking at the most significant bits. The sequence is always at least 2 bytes
430 long. Decode sequences of these types until the EOF or END marker is read.
432 length L = length of the text taken from the sliding window
434 If L=0, then count the number Z of the following zero bytes and add Z*255
435 to the value of the following non-zero byte. This allows setting L
438 position p = relative position of the beginning of the text
440 Exception: 00010001 00000000 00000000 means EOF
442 copying C = length 1..3 of copied characters or END=0
444 C following characters will be copied from the compressed text to the
445 output. The number CC is always stored in the 2 least significant bits of
446 the second last byte of the sequence.
448 If END is read, the algorithm switches to the copy mode.
450 pattern length position
452 0000ppCC pppppppp 2 10 bits (*)
453 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
454 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
458 LLLpppCC pppppppp 3..8 11 bits
463 Read the first byte and, if the most significant bits are 0000, perform the
464 following command, otherwise switch to the compressed mode (and evaluate the
467 pattern length position
469 0000LLLL L* 4..18 + extend N/A
471 Copy L characters from the compressed text to the output. The overhead for
472 incompressible strings is only roughly 1/256 + epsilon.
474 (*) After reading one copy command, switch to the compressed mode with the
475 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
476 and 2048 is added to the position (now it is slightly more than 11 bits),
477 because a sequence of length 2 would never be used.