2 * LiZaRd -- Fast compression method based on Lempel-Ziv 77
4 * (c) 2004, Robert Spalek <robert@ucw.cz>
6 * The file format is based on LZO1X and
7 * the compression method is based on zlib.
11 #include "lib/lizard.h"
20 typedef u16 hash_ptr_t;
22 /* the position in the original text is implicit; it is computed by locate_string() */
23 hash_ptr_t next; // 0=end
24 hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
27 #define HASH_SIZE (1<<14) // size of hash-table
28 #define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
29 #define CHAIN_MAX_TESTS 8 // crop longer collision chains
30 #define CHAIN_GOOD_MATCH 32 // we already have a good match => end
36 return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
40 locate_string(byte *string, uns record_id, uns head)
41 /* The strings are recorded into the hash-table regularly, hence there is no
42 * need to store the pointer there. */
44 string += record_id - head;
45 if (record_id >= head)
46 string -= HASH_RECORDS-1;
51 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
52 /* hash_tab[hash] == record_id points to the head of the double-linked
53 * link-list of strings with the same hash. The records are statically
54 * stored in circular array hash_rec (with the 1st entry unused), and the
55 * pointers are just 16-bit indices. The strings in every collision chain
56 * are ordered by age. */
58 uns count = CHAIN_MAX_TESTS;
60 while (record_id && count-- > 0)
62 byte *record_string = locate_string(string, record_id, head);
63 byte *cmp = record_string;
64 if (cmp[0] == string[0] && cmp[2] == string[2])
65 /* implies cmp[1] == string[1] */
67 if (cmp[3] == string[3])
70 if (*cmp++ == string[4] && *cmp++ == string[5]
71 && *cmp++ == string[6] && *cmp++ == string[7])
73 byte *str = string + 8;
74 while (str <= string_end && *cmp++ == *str++);
79 uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
83 *best_ptr = record_string;
84 if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
88 record_id = hash_rec[record_id].next;
94 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
95 /* We reuse hash-records stored in a circular array. First, delete the old
96 * one and then add the new one in front of the link-list. */
98 struct hash_record *rec = hash_rec + head;
99 if (*to_delete) /* unlink the original record */
101 uns prev_id = rec->prev & ((1<<15)-1);
102 if (rec->prev & (1<<15)) /* was a head */
103 hash_tab[prev_id] = 0;
104 else /* thanks to the ordering, this was a tail */
105 hash_rec[prev_id].next = 0;
107 rec->next = hash_tab[hash];
108 rec->prev = (1<<15) | hash;
109 hash_rec[rec->next].prev = head;
110 hash_tab[hash] = head; /* add the new record before the link-list */
112 if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
121 dump_unary_value(byte *out, uns l)
133 flush_copy_command(uns bof, byte *out, byte *start, uns len)
135 if (bof && len <= 238)
138 /* cannot happen when !!bof */
139 out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
142 /* leave 2 least significant bits of out[-2] set to 0 */
148 out = dump_unary_value(out, len - 18);
157 lizard_compress(byte *in, uns in_len, byte *out)
158 /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
159 * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
160 * allocated after in. Returns the actual compressed length. */
162 hash_ptr_t hash_tab[HASH_SIZE];
163 struct hash_record hash_rec[HASH_RECORDS];
165 byte *in_end = in + in_len;
166 byte *out_start = out;
167 byte *copy_start = in;
168 uns head = 1; /* 0 in unused */
170 bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
173 uns hash = hashf(in);
175 uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
177 #if 0 // TODO: now, our routine does not detect matches of length 2
178 if (len == 2 && (in - best->string) < ((1<<10) + 1))
184 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
185 in++; /* add a literal */
189 if (in + len > in_end) /* crop EOF */
195 /* Record the match. */
196 uns copy_len = in - copy_start;
197 uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
198 uns shift = in - best - 1;
199 /* Try to use a 2-byte sequence. */
203 if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
209 if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
211 /* optimisation for length-3 matches after a copy command */
215 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
216 *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
217 *out++ = shift & 0xff;
220 else if (shift < (1<<11) && len <= 8)
222 shift |= (len-3 + 2)<<11;
223 goto dump_2sequence; /* shift has 11 bits and contains also len */
225 /* We have to use a 3-byte sequence. */
226 else if (len == 3 && is_in_copy_mode)
227 /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
232 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
236 *out++ = (1<<5) | (len-2);
240 out = dump_unary_value(out, len - 33);
243 else /* shift < (1<<15)-1 becase of HASH_RECORDS */
245 shift++; /* because shift==0 is reserved for EOF */
246 byte pos_bit = (shift>>11) & (1<<3);
248 *out++ = (1<<4) | pos_bit | (len-2);
251 *out++ = (1<<4) | pos_bit;
252 out = dump_unary_value(out, len - 9);
255 *out++ = (shift>>6) & ~3; /* rest of shift fits into 14 bits */
256 *out++ = shift & 0xff;
258 /* Update the hash-table. */
259 head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
260 for (uns i=1; i<len; i++)
261 head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
265 if (in > in_end) /* crop at BOF */
267 uns copy_len = in - copy_start;
269 out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
270 *out++ = 17; /* add EOF */
273 return out - out_start;
277 read_unary_value(byte *in, uns *val)
288 lizard_decompress(byte *in, byte *out)
289 /* Requires out being allocated for the decompressed length must be known
290 * beforehand. It is desirable to lock the following memory page for
291 * read-only access to prevent buffer overflow. Returns the actual
292 * decompressed length or a negative number when an error has occured. */
294 byte *out_start = out;
295 uns expect_copy_command = 1;
297 if (*in > 17) /* short copy command at BOF */
300 expect_copy_command = 2;
301 goto perform_copy_command;
308 if (expect_copy_command == 1)
312 in = read_unary_value(in, &len);
317 expect_copy_command = 2;
318 goto perform_copy_command;
322 pos = ((c&0xc)<<6) | *in++;
323 if (expect_copy_command == 2)
338 in = read_unary_value(in, &len);
343 pos |= (*in++ & 0xfc)<<6;
354 in = read_unary_value(in, &len);
359 pos = (*in++ & 0xfc)<<6;
363 else /* high bits encode the length */
365 len = ((c&0xe0)>>5) -2 +3;
370 /* take from the sliding window */
373 memcpy(out, out-pos, len);
378 for (; len-- > 0; out++)
381 /* extract the copy-bits */
385 expect_copy_command = 0;
390 expect_copy_command = 1;
393 perform_copy_command:
394 memcpy(out, in, len);
399 return out - out_start;
402 struct lizard_buffer *
403 lizard_alloc(uns max_len)
405 int fd = open("/dev/zero", O_RDWR);
407 die("open(/dev/zero): %m");
408 struct lizard_buffer *buf = xmalloc(sizeof(struct lizard_buffer));
409 buf->len = (max_len + 2*PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
410 buf->ptr = mmap(NULL, buf->len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
411 if (buf->ptr == MAP_FAILED)
412 die("mmap(/dev/zero): %m");
417 lizard_free(struct lizard_buffer *buf)
419 munmap(buf->ptr, buf->len);
424 sigsegv_handler(int UNUSED whatsit)
426 die("SIGSEGV caught when decompressing.");
430 lizard_decompress_safe(byte *in, struct lizard_buffer *buf, uns expected_length)
431 /* Decompresses into buf->ptr and returns the length of the uncompressed
432 * file. Negative return values signalise errors. If something goes wrong
433 * and buffer would overflow, SIGSEGV is raised. */
435 uns lock_offset = (expected_length + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE;
436 if (lock_offset + PAGE_SIZE > buf->len)
438 mprotect(buf->ptr, lock_offset, PROT_READ | PROT_WRITE);
439 mprotect(buf->ptr + lock_offset, PAGE_SIZE, PROT_READ);
440 sighandler_t old_handler = signal(SIGSEGV, sigsegv_handler);
441 int len = lizard_decompress(in, buf->ptr);
442 signal(SIGSEGV, old_handler);
448 Description of the LZO1X format :
449 =================================
454 If the first byte is 00010001, it means probably EOF (empty file), so switch
455 to the compressed mode. If it is bigger, subtract 17 and copy this number of
456 the following characters to the ouput and switch to the compressed mode. If
457 it is smaller, go to the copy mode.
462 Read the first byte of the sequence and determine the type of bit-encoding by
463 looking at the most significant bits. The sequence is always at least 2 bytes
464 long. Decode sequences of these types until the EOF or END marker is read.
466 length L = length of the text taken from the sliding window
468 If L=0, then count the number Z of the following zero bytes and add Z*255
469 to the value of the following non-zero byte. This allows setting L
472 position p = relative position of the beginning of the text
474 Exception: 00010001 00000000 00000000 means EOF
476 copying C = length 1..3 of copied characters or END=0
478 C following characters will be copied from the compressed text to the
479 output. The number CC is always stored in the 2 least significant bits of
480 the second last byte of the sequence.
482 If END is read, the algorithm switches to the copy mode.
484 pattern length position
486 0000ppCC pppppppp 2 10 bits (*)
487 0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
488 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
492 LLLpppCC pppppppp 3..8 11 bits
497 Read the first byte and, if the most significant bits are 0000, perform the
498 following command, otherwise switch to the compressed mode (and evaluate the
501 pattern length position
503 0000LLLL L* 4..18 + extend N/A
505 Copy L characters from the compressed text to the output. The overhead for
506 incompressible strings is only roughly 1/256 + epsilon.
508 (*) After reading one copy command, switch to the compressed mode with the
509 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
510 and 2048 is added to the position (now it is slightly more than 11 bits),
511 because a sequence of length 2 would never be used.