2 * LiZzaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * The file format is based on LZO1X and
7 * the compression method is based on zlib.
11 #include "lib/lizzard.h"
15 typedef u16 hash_ptr_t;
17 byte *string; // in original text
18 hash_ptr_t next4, next3; // 0=end
21 #define HASH_SIZE ((1<<14) + 27) // prime, size of hash-table
22 #define HASH_RECORDS ((1<<15) - 1) // maximum number of records in hash-table
23 #define CHAIN_MAX_TESTS 16 // crop longer collision chains
24 #define CHAIN_GOOD_MATCH 16 // we already have a good match => end
30 #ifdef CPU_ALLOW_UNALIGNED
31 hash = * (uns *) string;
32 #elif CPU_LITTLE_ENDIAN
33 hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24);
35 hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24);
39 #ifdef CPU_LITTLE_ENDIAN
40 #define MASK_4TH_CHAR (~(0xff << 24))
42 #define MASK_4TH_CHAR (~0xff)
46 find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr)
47 /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence
48 * some branches of the code will be pruned).
50 * hash_tab[hash] points to the 1st record of the link-list of strings with the
51 * same hash. The records are statically stored in circular array hash_rec
52 * (with 1st entry unused), and the pointers are just 16-bit indices. In one
53 * array, we store 2 independent link-lists: with 3 and 4 hashed characters.
55 * Updates are performed more often than searches, so they are lazy: we only
56 * add new records in front of link-lists and never delete them. By reusing
57 * an already allocated space in the circular array we necessarily modify
58 * records that are already pointed at in the middle of some link-list. To
59 * prevent confusion, this function checks 2 invariants: the strings in a
60 * collision chain are ordered, and the hash of the 1st record in the chain
63 if (list_id == 3) /* Find the 1st record in the collision chain. */
64 hash = hash & MASK_4TH_CHAR;
68 struct hash_record *record = hash_rec + hash_tab[hash];
69 uns hash2 = hashf(record->string);
71 hash2 = hash2 & MASK_4TH_CHAR;
73 if (hash2 != hash) /* Verify the hash-value. */
75 hash_tab[hash] = 0; /* prune */
79 uns count = CHAIN_MAX_TESTS;
81 byte *last_string = string_end;
82 hash_ptr_t *last_ptr = NULL;
85 byte *cmp = record->string;
86 if (cmp >= last_string) /* collision chain is ordered by age */
88 *last_ptr = 0; /* prune */
91 if (cmp[0] == string[0] && cmp[2] == string[2])
93 if (cmp[3] == string[3])
95 /* implies cmp[1] == string[1], becase we hash into 14+eps bits */
97 if (*cmp++ == string[4] && *cmp++ == string[5]
98 && *cmp++ == string[6] && *cmp++ == string[7])
100 byte *str = string + 8;
101 while (str <= string_end && *cmp++ == *string++);
104 else if (list_id == 3)
105 /* hashing 3 characters implies cmp[1] == string[1]
106 hashing 4 characters implies cmp[1] != string[1] */
110 uns len = cmp - record->string - 1; /* cmp points 2 characters after the last match */
114 *best_ptr = record->string;
115 if (best_len >= CHAIN_GOOD_MATCH) /* optimisation */
124 last_ptr = &record->next3;
125 record = hash_rec + record->next3;
131 last_ptr = &record->next4;
132 record = hash_rec + record->next4;
139 hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head)
141 uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE;
142 uns h4 = hash % HASH_SIZE;
143 struct hash_record *rec = hash_rec + head;
144 rec->string = string;
145 rec->next3 = tab3[h3];
146 rec->next4 = tab4[h4];
147 tab3[h3] = head; /* add the new record before the link-list */
149 head++; /* circular buffer, reuse old records */
150 if (head >= HASH_RECORDS)
156 dump_unary_value(byte *out, uns l)
168 flush_copy_command(uns bof, byte *out, byte *start, uns len)
170 if (bof && len <= 238)
173 /* cannot happen when !!bof */
174 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
177 /* leave 2 least significant bits of out[-2] set to 0 */
183 out = dump_unary_value(out, len - 18);
192 lizzard_compress(byte *in, uns in_len, byte *out)
193 /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR.
194 * There must be at least 8 characters allocated after in.
195 * Returns the actual compressed length. */
197 hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE];
198 struct hash_record hash_rec[HASH_RECORDS];
200 byte *in_end = in + in_len;
201 byte *out_start = out;
202 byte *copy_start = in;
204 bzero(hash_tab3, sizeof(hash_tab3)); /* init the hash-table */
205 bzero(hash_tab4, sizeof(hash_tab4));
208 uns hash = hashf(in);
210 uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best);
213 len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best);
218 #if 0 // TODO: now, our routine does not detect matches of length 2
219 if (len == 2 && (in - best->string) < ((1<<10) + 1))
223 head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
224 in++; /* add a literal */
228 if (in + len > in_end) /* crop EOF */
234 /* Record the match. */
235 uns copy_len = in - copy_start;
236 uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
237 uns shift = in - best - 1;
238 /* Try to use a 2-byte sequence. */
241 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
246 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
247 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
248 *out++ = shift & 0xff;
250 else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10))
252 /* optimisation for length-3 matches after a copy command */
257 else if (shift < (1<<11) && len <= 8)
259 shift |= (len-3 + 2)<<11;
260 goto dump_2sequence; /* shift has 11 bits and contains also len */
262 /* We have to use a 3-byte sequence. */
263 else if (len == 3 && is_in_copy_mode)
264 /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
269 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
273 *out++ = (1<<5) | (len-2);
277 out = dump_unary_value(out, len - 33);
280 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
282 shift++; /* because shift==0 is reserved for EOF */
283 byte pos_bit = (shift>>11) & (1<<3);
285 *out++ = (1<<4) | pos_bit | (len-2);
288 *out++ = (1<<4) | pos_bit;
289 out = dump_unary_value(out, len - 9);
292 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
293 *out++ = shift & 0xff;
295 /* Update the hash-table. */
296 head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
297 for (uns i=1; i<len; i++)
298 head = hash_string(in+i, hashf(in+i), hash_tab3, hash_tab4, hash_rec, head);
302 if (in > in_end) /* crop at BOF */
304 uns copy_len = in - copy_start;
306 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
307 *out++ = 17; /* add EOF */
310 return out - out_start;
314 read_unary_value(byte *in, uns *val)
325 lizzard_decompress(byte *in, byte *out)
326 /* Requires out being allocated for the decompressed length must be known
327 * beforehand. It is desirable to lock the following memory page for
328 * read-only access to prevent buffer overflow. Returns the actual
329 * decompressed length or a negative number when an error has occured. */
331 byte *out_start = out;
332 uns expect_copy_command = 1;
334 if (*in > 17) /* short copy command at BOF */
337 expect_copy_command = 2;
338 goto perform_copy_command;
345 if (expect_copy_command == 1)
348 in = read_unary_value(in, &len);
351 expect_copy_command = 2;
352 goto perform_copy_command;
356 pos = (c&0xc)<<6 | *in++;
357 if (expect_copy_command == 2)
371 in = read_unary_value(in, &len);
376 pos |= (*in++ & 0xfc)<<6;
380 else /* shift it back */
388 in = read_unary_value(in, &len);
393 pos = (*in++ & 0xfc)<<6;
396 else /* high bits encode the length */
398 len = (c&0xe)>>5 -2 +3;
402 /* take from the sliding window */
403 memcpy(out, in-1-pos, len); //FIXME: overlapping
405 /* extract the copy-bits */
408 expect_copy_command = 0; /* and fall-thru */
411 expect_copy_command = 1;
415 perform_copy_command:
416 memcpy(out, in, len);
421 return out - out_start;
426 Description of the LZO1X format :
427 =================================
432 If the first byte is 00010001, it means probably EOF (empty file), so switch
433 to the compressed mode. If it is bigger, subtract 17 and copy this number of
434 the following characters to the ouput and switch to the compressed mode. If
435 it is smaller, go to the copy mode.
440 Read the first byte of the sequence and determine the type of bit-encoding by
441 looking at the most significant bits. The sequence is always at least 2 bytes
442 long. Decode sequences of these types until the EOF or END marker is read.
444 length L = length of the text taken from the sliding window
446 If L=0, then count the number Z of the following zero bytes and add Z*255
447 to the value of the following non-zero byte. This allows setting L
450 position p = relative position of the beginning of the text
452 Exception: 00010001 00000000 00000000 means EOF
454 copying C = length 1..3 of copied characters or END=0
456 C following characters will be copied from the compressed text to the
457 output. The number CC is always stored in the 2 least significant bits of
458 the second last byte of the sequence.
460 If END is read, the algorithm switches to the copy mode.
462 pattern length position
464 0000ppCC pppppppp 2 10 bits (*)
465 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
466 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
470 LLLpppCC pppppppp 3..8 11 bits
475 Read the first byte and, if the most significant bits are 0000, perform the
476 following command, otherwise switch to the compressed mode (and evaluate the
479 pattern length position
481 0000LLLL L* 4..18 + extend N/A
483 Copy L characters from the compressed text to the output. The overhead for
484 incompressible strings is only roughly 1/256 + epsilon.
486 (*) After reading one copy command, switch to the compressed mode with the
487 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
488 and 2048 is added to the position (now it is slightly more than 11 bits),
489 because a sequence of length 2 would never be used.