2 * LiZzaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * The file format is based on LZO1X and
7 * the compression method is based on zlib.
11 #include "lib/lizzard.h"
15 typedef u16 hash_ptr_t;
17 byte *string; // in original text
18 hash_ptr_t next4, next3; // 0=end
21 #define HASH_SIZE ((1<<14) + 27) // prime, size of hash-table
22 #define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
23 #define CHAIN_MAX_TESTS 16 // crop longer collision chains
24 #define CHAIN_GOOD_MATCH 16 // we already have a good match => end
30 #ifdef CPU_ALLOW_UNALIGNED
31 hash = * (uns *) string;
32 #elif CPU_LITTLE_ENDIAN
33 hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24);
35 hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24);
39 #ifdef CPU_LITTLE_ENDIAN
40 #define MASK_4TH_CHAR (~(0xff << 24))
42 #define MASK_4TH_CHAR (~0xff)
46 find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr)
48 * We heavily rely on gcc optimisations (e.g. list_id is a constant hence
49 * some branches of the code will be pruned).
51 * hash_tab[hash] points to the head of the link-list of strings with
52 * the same hash. The records are statically stored in circular array
53 * hash_rec (with 1st entry unused), and the pointers are just 16-bit
54 * indices. In one array, we store 2 independent link-lists: with 3 and 4
57 * The data structure fulfills 2 invariants: the strings in every collision
58 * chain are ordered by age, and the head of every link-list is placed
59 * correctly. The records in the middle of the link-lists may be placed
60 * wrong, and this is handled here by checking the string order.
63 if (list_id == 3) /* Find the 1st record in the collision chain. It is correct. */
64 hash &= MASK_4TH_CHAR;
68 struct hash_record *record = hash_rec + hash_tab[hash];
70 uns count = CHAIN_MAX_TESTS;
72 byte *last_cmp = string_end;
73 hash_ptr_t *last_ptr = NULL;
76 byte *cmp = record->string;
77 if (cmp >= last_cmp) /* collision chain is ordered by age */
79 *last_ptr = 0; /* prune */
83 if (cmp[0] == string[0] && cmp[2] == string[2])
85 if (cmp[3] == string[3])
87 /* implies cmp[1] == string[1], becase we hash into 14+eps bits */
89 if (*cmp++ == string[4] && *cmp++ == string[5]
90 && *cmp++ == string[6] && *cmp++ == string[7])
92 byte *str = string + 8;
93 while (str <= string_end && *cmp++ == *str++);
96 else if (list_id == 3)
97 /* hashing 3 characters implies cmp[1] == string[1]
98 hashing 4 characters implies cmp[1] != string[1] */
102 uns len = cmp - record->string - 1; /* cmp points 2 characters after the last match */
106 *best_ptr = record->string;
107 if (best_len >= CHAIN_GOOD_MATCH) /* optimisation */
112 if (list_id == 3) /* move to the next collision */
113 last_ptr = &record->next3;
115 last_ptr = &record->next4;
116 uns next = *last_ptr;
119 record = hash_rec + next;
125 hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head)
127 * Updates are performed more often than searches, so they are a bit lazy: we
128 * add new records in front of the link-lists, but only delete them when they
129 * are the _head_ of some link-list. This is done in constant time.
131 * By reusing an already allocated space in the circular array we necessarily
132 * alter records that are referred in the _middle_ of some link-list.
133 * However the newly inserted string is younger than anything else in the
134 * table and it can be checked easily.
137 struct hash_record *rec = hash_rec + head;
138 if (rec->string) /* unlink the original record if it was the _head_ of some link-list */
140 uns h4 = hashf(rec->string);
141 uns h3 = h4 & MASK_4TH_CHAR;
144 if (tab3[h3] == head)
146 if (tab4[h4] == head)
150 uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE;
151 uns h4 = hash % HASH_SIZE;
152 rec->string = string; /* is younger than any string inserted before */
153 rec->next3 = tab3[h3];
154 rec->next4 = tab4[h4];
155 tab3[h3] = head; /* add the new record before the link-list */
157 head++; /* circular buffer, reuse old records, 0 is unused */
158 if (head >= HASH_RECORDS)
164 dump_unary_value(byte *out, uns l)
176 flush_copy_command(uns bof, byte *out, byte *start, uns len)
178 if (bof && len <= 238)
181 /* cannot happen when !!bof */
182 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
185 /* leave 2 least significant bits of out[-2] set to 0 */
191 out = dump_unary_value(out, len - 18);
200 lizzard_compress(byte *in, uns in_len, byte *out)
201 /* Requires out being allocated for at least in_len * LIZZARD_MAX_MULTIPLY +
202 * LIZZARD_MAX_ADD. There must be at least LIZZARD_NEEDS_CHARS characters
203 * allocated after in. Returns the actual compressed length. */
205 hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE];
206 struct hash_record hash_rec[HASH_RECORDS];
208 byte *in_end = in + in_len;
209 byte *out_start = out;
210 byte *copy_start = in;
211 uns head = 1; /* 0 in unused */
212 bzero(hash_tab3, sizeof(hash_tab3)); /* init the hash-table */
213 bzero(hash_tab4, sizeof(hash_tab4));
216 uns hash = hashf(in);
218 uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best);
221 len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best);
226 #if 0 // TODO: now, our routine does not detect matches of length 2
227 if (len == 2 && (in - best->string) < ((1<<10) + 1))
231 head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
232 in++; /* add a literal */
236 if (in + len > in_end) /* crop EOF */
242 /* Record the match. */
243 uns copy_len = in - copy_start;
244 uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
245 uns shift = in - best - 1;
246 /* Try to use a 2-byte sequence. */
249 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
254 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
255 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
256 *out++ = shift & 0xff;
258 else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
260 /* optimisation for length-3 matches after a copy command */
265 else if (shift < (1<<11) && len <= 8)
267 shift |= (len-3 + 2)<<11;
268 goto dump_2sequence; /* shift has 11 bits and contains also len */
270 /* We have to use a 3-byte sequence. */
271 else if (len == 3 && is_in_copy_mode)
272 /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
277 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
281 *out++ = (1<<5) | (len-2);
285 out = dump_unary_value(out, len - 33);
288 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
290 shift++; /* because shift==0 is reserved for EOF */
291 byte pos_bit = (shift>>11) & (1<<3);
293 *out++ = (1<<4) | pos_bit | (len-2);
296 *out++ = (1<<4) | pos_bit;
297 out = dump_unary_value(out, len - 9);
300 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
301 *out++ = shift & 0xff;
303 /* Update the hash-table. */
304 head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
305 for (uns i=1; i<len; i++)
306 head = hash_string(in+i, hashf(in+i), hash_tab3, hash_tab4, hash_rec, head);
310 if (in > in_end) /* crop at BOF */
312 uns copy_len = in - copy_start;
314 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
315 *out++ = 17; /* add EOF */
318 return out - out_start;
322 read_unary_value(byte *in, uns *val)
333 lizzard_decompress(byte *in, byte *out)
334 /* Requires out being allocated for the decompressed length must be known
335 * beforehand. It is desirable to lock the following memory page for
336 * read-only access to prevent buffer overflow. Returns the actual
337 * decompressed length or a negative number when an error has occured. */
339 byte *out_start = out;
340 uns expect_copy_command = 1;
342 if (*in > 17) /* short copy command at BOF */
345 expect_copy_command = 2;
346 goto perform_copy_command;
353 if (expect_copy_command == 1)
357 in = read_unary_value(in, &len);
362 expect_copy_command = 2;
363 goto perform_copy_command;
367 pos = ((c&0xc)<<6) | *in++;
368 if (expect_copy_command == 2)
383 in = read_unary_value(in, &len);
388 pos |= (*in++ & 0xfc)<<6;
399 in = read_unary_value(in, &len);
404 pos = (*in++ & 0xfc)<<6;
408 else /* high bits encode the length */
410 len = ((c&0xe0)>>5) -2 +3;
415 /* take from the sliding window */
418 memcpy(out, out-pos, len);
423 for (; len-- > 0; out++)
426 /* extract the copy-bits */
430 expect_copy_command = 0;
435 expect_copy_command = 1;
438 perform_copy_command:
439 memcpy(out, in, len);
444 return out - out_start;
449 Description of the LZO1X format :
450 =================================
455 If the first byte is 00010001, it means probably EOF (empty file), so switch
456 to the compressed mode. If it is bigger, subtract 17 and copy this number of
457 the following characters to the ouput and switch to the compressed mode. If
458 it is smaller, go to the copy mode.
463 Read the first byte of the sequence and determine the type of bit-encoding by
464 looking at the most significant bits. The sequence is always at least 2 bytes
465 long. Decode sequences of these types until the EOF or END marker is read.
467 length L = length of the text taken from the sliding window
469 If L=0, then count the number Z of the following zero bytes and add Z*255
470 to the value of the following non-zero byte. This allows setting L
473 position p = relative position of the beginning of the text
475 Exception: 00010001 00000000 00000000 means EOF
477 copying C = length 1..3 of copied characters or END=0
479 C following characters will be copied from the compressed text to the
480 output. The number CC is always stored in the 2 least significant bits of
481 the second last byte of the sequence.
483 If END is read, the algorithm switches to the copy mode.
485 pattern length position
487 0000ppCC pppppppp 2 10 bits (*)
488 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
489 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
493 LLLpppCC pppppppp 3..8 11 bits
498 Read the first byte and, if the most significant bits are 0000, perform the
499 following command, otherwise switch to the compressed mode (and evaluate the
502 pattern length position
504 0000LLLL L* 4..18 + extend N/A
506 Copy L characters from the compressed text to the output. The overhead for
507 incompressible strings is only roughly 1/256 + epsilon.
509 (*) After reading one copy command, switch to the compressed mode with the
510 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
511 and 2048 is added to the position (now it is slightly more than 11 bits),
512 because a sequence of length 2 would never be used.