2 * LiZaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * The file format is based on LZO1X and
7 * the compression method is based on zlib.
11 #include "lib/lizard.h"
19 typedef u16 hash_ptr_t;
21 /* the position in the original text is implicit; it is computed by locate_string() */
22 hash_ptr_t next; // 0=end
23 hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
26 #define HASH_SIZE (1<<14) // size of hash-table
27 #define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
28 #define CHAIN_MAX_TESTS 8 // crop longer collision chains
29 #define CHAIN_GOOD_MATCH 32 // we already have a good match => end
35 return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
39 locate_string(byte *string, uns record_id, uns head)
40 /* The strings are recorded into the hash-table regularly, hence there is no
41 * need to store the pointer there. */
43 string += record_id - head;
44 if (record_id >= head)
45 string -= HASH_RECORDS-1;
50 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
51 /* hash_tab[hash] == record_id points to the head of the double-linked
52 * link-list of strings with the same hash. The records are statically
53 * stored in circular array hash_rec (with the 1st entry unused), and the
54 * pointers are just 16-bit indices. The strings in every collision chain
55 * are ordered by age. */
57 uns count = CHAIN_MAX_TESTS;
59 while (record_id && count-- > 0)
61 byte *record_string = locate_string(string, record_id, head);
62 byte *cmp = record_string;
63 if (cmp[0] == string[0] && cmp[2] == string[2])
64 /* implies cmp[1] == string[1] */
66 if (cmp[3] == string[3])
69 if (*cmp++ == string[4] && *cmp++ == string[5]
70 && *cmp++ == string[6] && *cmp++ == string[7])
72 byte *str = string + 8;
73 while (str <= string_end && *cmp++ == *str++);
78 uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
82 *best_ptr = record_string;
83 if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
87 record_id = hash_rec[record_id].next;
93 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
94 /* We reuse hash-records stored in a circular array. First, delete the old
95 * one and then add the new one in front of the link-list. */
97 struct hash_record *rec = hash_rec + head;
98 if (*to_delete) /* unlink the original record */
100 uns prev_id = rec->prev & ((1<<15)-1);
101 if (rec->prev & (1<<15)) /* was a head */
102 hash_tab[prev_id] = 0;
103 else /* thanks to the ordering, this was a tail */
104 hash_rec[prev_id].next = 0;
106 rec->next = hash_tab[hash];
107 rec->prev = (1<<15) | hash;
108 hash_rec[rec->next].prev = head;
109 hash_tab[hash] = head; /* add the new record before the link-list */
111 if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
120 dump_unary_value(byte *out, uns l)
132 flush_copy_command(uns bof, byte *out, byte *start, uns len)
134 if (bof && len <= 238)
137 /* cannot happen when !!bof */
138 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
141 /* leave 2 least significant bits of out[-2] set to 0 */
147 out = dump_unary_value(out, len - 18);
156 lizard_compress(byte *in, uns in_len, byte *out)
157 /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
158 * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
159 * allocated after in. Returns the actual compressed length. */
161 hash_ptr_t hash_tab[HASH_SIZE];
162 struct hash_record hash_rec[HASH_RECORDS];
164 byte *in_end = in + in_len;
165 byte *out_start = out;
166 byte *copy_start = in;
167 uns head = 1; /* 0 in unused */
169 bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
172 uns hash = hashf(in);
174 uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
176 #if 0 // TODO: now, our routine does not detect matches of length 2
177 if (len == 2 && (in - best->string) < ((1<<10) + 1))
183 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
184 in++; /* add a literal */
188 if (in + len > in_end) /* crop EOF */
194 /* Record the match. */
195 uns copy_len = in - copy_start;
196 uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
197 uns shift = in - best - 1;
198 /* Try to use a 2-byte sequence. */
202 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
208 if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
210 /* optimisation for length-3 matches after a copy command */
214 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
215 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
216 *out++ = shift & 0xff;
219 else if (shift < (1<<11) && len <= 8)
221 shift |= (len-3 + 2)<<11;
222 goto dump_2sequence; /* shift has 11 bits and contains also len */
224 /* We have to use a 3-byte sequence. */
225 else if (len == 3 && is_in_copy_mode)
226 /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
231 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
235 *out++ = (1<<5) | (len-2);
239 out = dump_unary_value(out, len - 33);
242 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
244 shift++; /* because shift==0 is reserved for EOF */
245 byte pos_bit = (shift>>11) & (1<<3);
247 *out++ = (1<<4) | pos_bit | (len-2);
250 *out++ = (1<<4) | pos_bit;
251 out = dump_unary_value(out, len - 9);
254 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
255 *out++ = shift & 0xff;
257 /* Update the hash-table. */
258 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
259 for (uns i=1; i<len; i++)
260 head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
264 if (in > in_end) /* crop at BOF */
266 uns copy_len = in - copy_start;
268 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
269 *out++ = 17; /* add EOF */
272 return out - out_start;
276 read_unary_value(byte *in, uns *val)
287 lizard_decompress(byte *in, byte *out)
288 /* Requires out being allocated for the decompressed length must be known
289 * beforehand. It is desirable to lock the following memory page for
290 * read-only access to prevent buffer overflow. Returns the actual
291 * decompressed length or a negative number when an error has occured. */
293 byte *out_start = out;
294 uns expect_copy_command = 1;
296 if (*in > 17) /* short copy command at BOF */
299 expect_copy_command = 2;
300 goto perform_copy_command;
307 if (expect_copy_command == 1)
311 in = read_unary_value(in, &len);
316 expect_copy_command = 2;
317 goto perform_copy_command;
321 pos = ((c&0xc)<<6) | *in++;
322 if (expect_copy_command == 2)
337 in = read_unary_value(in, &len);
342 pos |= (*in++ & 0xfc)<<6;
353 in = read_unary_value(in, &len);
358 pos = (*in++ & 0xfc)<<6;
362 else /* high bits encode the length */
364 len = ((c&0xe0)>>5) -2 +3;
369 /* take from the sliding window */
372 memcpy(out, out-pos, len);
377 for (; len-- > 0; out++)
380 /* extract the copy-bits */
384 expect_copy_command = 0;
389 expect_copy_command = 1;
392 perform_copy_command:
393 memcpy(out, in, len);
398 return out - out_start;
401 struct lizard_buffer *
402 lizard_alloc(uns max_len)
404 int fd = open("/dev/zero", O_RDWR);
406 die("open(/dev/zero): %m");
407 struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
408 buf->len = (max_len + 2*PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
409 buf->ptr = mmap(NULL, buf->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
410 if (buf->ptr == MAP_FAILED)
411 die("mmap(/dev/zero): %m");
416 lizard_free(struct lizard_buffer *buf)
418 munmap(buf->ptr, buf->len);
423 lizard_decompress_safe(byte *in, struct lizard_buffer *buf, uns expected_length)
424 /* Decompresses into buf->ptr and returns the length of the uncompressed
425 * file. Negative return values signalise errors. If something goes wrong
426 * and buffer would overflow, SIGSEGV is raised. */
428 uns lock_offset = (expected_length + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
429 if (lock_offset + PAGE_SIZE > buf->len)
431 mprotect(buf->ptr, lock_offset, PROT_READ | PROT_WRITE);
432 mprotect(buf->ptr + lock_offset, PAGE_SIZE, PROT_READ);
433 return lizard_decompress(in, buf->ptr);
438 Description of the LZO1X format :
439 =================================
444 If the first byte is 00010001, it means probably EOF (empty file), so switch
445 to the compressed mode. If it is bigger, subtract 17 and copy this number of
446 the following characters to the ouput and switch to the compressed mode. If
447 it is smaller, go to the copy mode.
452 Read the first byte of the sequence and determine the type of bit-encoding by
453 looking at the most significant bits. The sequence is always at least 2 bytes
454 long. Decode sequences of these types until the EOF or END marker is read.
456 length L = length of the text taken from the sliding window
458 If L=0, then count the number Z of the following zero bytes and add Z*255
459 to the value of the following non-zero byte. This allows setting L
462 position p = relative position of the beginning of the text
464 Exception: 00010001 00000000 00000000 means EOF
466 copying C = length 1..3 of copied characters or END=0
468 C following characters will be copied from the compressed text to the
469 output. The number CC is always stored in the 2 least significant bits of
470 the second last byte of the sequence.
472 If END is read, the algorithm switches to the copy mode.
474 pattern length position
476 0000ppCC pppppppp 2 10 bits (*)
477 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
478 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
482 LLLpppCC pppppppp 3..8 11 bits
487 Read the first byte and, if the most significant bits are 0000, perform the
488 following command, otherwise switch to the compressed mode (and evaluate the
491 pattern length position
493 0000LLLL L* 4..18 + extend N/A
495 Copy L characters from the compressed text to the output. The overhead for
496 incompressible strings is only roughly 1/256 + epsilon.
498 (*) After reading one copy command, switch to the compressed mode with the
499 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
500 and 2048 is added to the position (now it is slightly more than 11 bits),
501 because a sequence of length 2 would never be used.