/*
- * LiZzaRd -- Fast compression method based on Lempel-Ziv 77
+ * LiZaRd -- Fast compression method based on Lempel-Ziv 77
*
* (c) 2004, Robert Spalek <robert@ucw.cz>
*
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ *
* The file format is based on LZO1X and
* the compression method is based on zlib.
*/
#include "lib/lib.h"
-#include "lib/lizzard.h"
+#include "lib/lizard.h"
#include <string.h>
typedef u16 hash_ptr_t;
struct hash_record {
- byte *string; // in original text
- hash_ptr_t next4, next3; // 0=end
+ /* the position in the original text is implicit; it is computed by locate_string() */
+ hash_ptr_t next; // 0=end
+ hash_ptr_t prev; // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
};
-#define HASH_SIZE ((1<<14) + 27) // prime, size of hash-table
-#define HASH_RECORDS ((1<<15) - 1) // maximum number of records in hash-table
-#define CHAIN_MAX_TESTS 16 // crop longer collision chains
-#define CHAIN_GOOD_MATCH 16 // we already have a good match => end
+#define HASH_SIZE (1<<14) // size of hash-table
+#define HASH_RECORDS (1<<15) // maximum number of records in hash-table, 0 is unused ==> subtract 1
+#define CHAIN_MAX_TESTS 8 // crop longer collision chains
+#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
static inline uns
hashf(byte *string)
+ /* 0..HASH_SIZE-1 */
{
- uns hash;
-#ifdef CPU_ALLOW_UNALIGNED
- hash = * (uns *) string;
-#elif CPU_LITTLE_ENDIAN
- hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24);
-#else
- hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24);
-#endif
- return hash;
+ return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
}
-#ifdef CPU_LITTLE_ENDIAN
-#define MASK_4TH_CHAR (~(0xff << 24))
-#else
-#define MASK_4TH_CHAR (~0xff)
-#endif
-static inline uns
-find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr)
- /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence
- * some branches of the code will be pruned).
- *
- * hash_tab[hash] points to the 1st record of the link-list of strings with the
- * same hash. The records are statically stored in circular array hash_rec
- * (with 1st entry unused), and the pointers are just 16-bit indices. In one
- * array, we store 2 independent link-lists: with 3 and 4 hashed characters.
- *
- * Updates are performed more often than searches, so they are lazy: we only
- * add new records in front of link-lists and never delete them. By reusing
- * an already allocated space in the circular array we necessarily modify
- * records that are already pointed at in the middle of some link-list. To
- * prevent confusion, this function checks 2 invariants: the strings in a
- * collision chain are ordered, and the hash of the 1st record in the chain
- * must match. */
+static inline byte *
+locate_string(byte *string, uns record_id, uns head)
+ /* The strings are recorded into the hash-table regularly, hence there is no
+ * need to store the pointer there. */
{
- if (list_id == 3) /* Find the 1st record in the collision chain. */
- hash = hash & MASK_4TH_CHAR;
- hash %= HASH_SIZE;
- if (!hash_tab[hash])
- return 0;
- struct hash_record *record = hash_rec + hash_tab[hash];
- uns hash2 = hashf(record->string);
- if (list_id == 3)
- hash2 = hash2 & MASK_4TH_CHAR;
- hash2 %= HASH_SIZE;
- if (hash2 != hash) /* Verify the hash-value. */
- {
- hash_tab[hash] = 0; /* prune */
- return 0;
- }
+ string += record_id - head;
+ if (record_id >= head)
+ string -= HASH_RECORDS-1;
+ return string;
+}
+static inline uns
+find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
+ /* hash_tab[hash] == record_id points to the head of the double-linked
+ * link-list of strings with the same hash. The records are statically
+ * stored in circular array hash_rec (with the 1st entry unused), and the
+ * pointers are just 16-bit indices. The strings in every collision chain
+ * are ordered by age. */
+{
uns count = CHAIN_MAX_TESTS;
uns best_len = 0;
- byte *last_string = string_end;
- hash_ptr_t *last_ptr = NULL;
- while (count-- > 0)
+ while (record_id && count-- > 0)
{
- byte *cmp = record->string;
- if (cmp >= last_string) /* collision chain is ordered by age */
- {
- *last_ptr = 0; /* prune */
- break;
- }
+ byte *record_string = locate_string(string, record_id, head);
+ byte *cmp = record_string;
if (cmp[0] == string[0] && cmp[2] == string[2])
+ /* implies cmp[1] == string[1] */
{
if (cmp[3] == string[3])
{
- /* implies cmp[1] == string[1], becase we hash into 14+eps bits */
cmp += 4;
if (*cmp++ == string[4] && *cmp++ == string[5]
&& *cmp++ == string[6] && *cmp++ == string[7])
{
byte *str = string + 8;
- while (str <= string_end && *cmp++ == *string++);
+ while (str <= string_end && *cmp++ == *str++);
}
}
- else if (list_id == 3)
- /* hashing 3 characters implies cmp[1] == string[1]
- hashing 4 characters implies cmp[1] != string[1] */
- cmp += 4;
else
- goto next_collision;
- uns len = cmp - record->string - 1; /* cmp points 2 characters after the last match */
+ cmp += 4;
+ uns len = cmp - record_string - 1; /* cmp points 2 characters after the last match */
if (len > best_len)
{
best_len = len;
- *best_ptr = record->string;
- if (best_len >= CHAIN_GOOD_MATCH) /* optimisation */
+ *best_ptr = record_string;
+ if (best_len >= CHAIN_GOOD_MATCH) /* optimization */
break;
}
}
-next_collision:
- if (list_id == 3)
- {
- if (!record->next3)
- break;
- last_ptr = &record->next3;
- record = hash_rec + record->next3;
- }
- else
- {
- if (!record->next4)
- break;
- last_ptr = &record->next4;
- record = hash_rec + record->next4;
- }
+ record_id = hash_rec[record_id].next;
}
return best_len;
}
-static inline uns
-hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head)
+static uns
+hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
+ /* We reuse hash-records stored in a circular array. First, delete the old
+ * one and then add the new one in front of the link-list. */
{
- uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE;
- uns h4 = hash % HASH_SIZE;
struct hash_record *rec = hash_rec + head;
- rec->string = string;
- rec->next3 = tab3[h3];
- rec->next4 = tab4[h4];
- tab3[h3] = head; /* add the new record before the link-list */
- tab4[h4] = head;
- head++; /* circular buffer, reuse old records */
- if (head >= HASH_RECORDS)
- head = 0;
+ if (*to_delete) /* unlink the original record */
+ {
+ uns prev_id = rec->prev & ((1<<15)-1);
+ if (rec->prev & (1<<15)) /* was a head */
+ hash_tab[prev_id] = 0;
+ else /* thanks to the ordering, this was a tail */
+ hash_rec[prev_id].next = 0;
+ }
+ rec->next = hash_tab[hash];
+ rec->prev = (1<<15) | hash;
+ hash_rec[rec->next].prev = head;
+ hash_tab[hash] = head; /* add the new record before the link-list */
+
+ if (++head >= HASH_RECORDS) /* circular buffer, reuse old records, 0 is unused */
+ {
+ head = 1;
+ *to_delete = 1;
+ }
return head;
}
if (bof && len <= 238)
*out++ = len + 17;
else if (len < 4)
+ {
/* cannot happen when !!bof */
out[-2] |= len; /* invariant: lowest 2 bits 2 bytes back */
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) start;
+ return out + len;
+#else
+ while (len-- > 0)
+ *out++ = *start++;
+ return out;
+#endif
+ }
else
{
/* leave 2 least significant bits of out[-2] set to 0 */
out = dump_unary_value(out, len - 18);
}
}
- while (len-- > 0)
- *out++ = *start++;
- return out;
+ memcpy(out, start, len);
+ return out + len;
}
int
-lizzard_compress(byte *in, uns in_len, byte *out)
- /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR.
- * There must be at least 8 characters allocated after in.
- * Returns the actual compressed length. */
+lizard_compress(byte *in, uns in_len, byte *out)
+ /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+ * LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
+ * allocated after in. Returns the actual compressed length. */
{
- hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE];
+ hash_ptr_t hash_tab[HASH_SIZE];
struct hash_record hash_rec[HASH_RECORDS];
- byte *in_start = in;
byte *in_end = in + in_len;
byte *out_start = out;
byte *copy_start = in;
- uns head = 0;
- bzero(hash_tab3, sizeof(hash_tab3)); /* init the hash-table */
- bzero(hash_tab4, sizeof(hash_tab4));
+ uns head = 1; /* 0 in unused */
+ uns to_delete = 0, bof = 1;
+ bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
while (in < in_end)
{
uns hash = hashf(in);
byte *best;
- uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best);
- if (len >= 3)
- goto match;
- len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best);
-
-match_found:
- if (len >= 3)
- goto match;
+ uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
+ if (len < 3)
#if 0 // TODO: now, our routine does not detect matches of length 2
- if (len == 2 && (in - best->string) < ((1<<10) + 1))
- goto match;
+ if (len == 2 && (in - best->string - 1) < (1<<10))
+ { /* pass-thru */ }
+ else
#endif
+ {
literal:
- head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
- in++; /* add a literal */
- continue;
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+ in++; /* add a literal */
+ continue;
+ }
-match:
if (in + len > in_end) /* crop EOF */
{
len = in_end - in;
if (len < 3)
- goto match_found;
+ goto literal;
}
/* Record the match. */
uns copy_len = in - copy_start;
- uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
+ uns is_in_copy_mode = bof || copy_len >= 4;
uns shift = in - best - 1;
/* Try to use a 2-byte sequence. */
+#if 0
if (len == 2)
{
if (is_in_copy_mode || !copy_len) /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
goto literal;
-
+ else
+ goto dump_2sequence;
+ } else
+#endif
+ /* now, len >= 3 */
+ if (shift < (1<<11) && len <= 8)
+ {
+ shift |= (len-3 + 2)<<11;
dump_2sequence:
- if (copy_len > 0)
- out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
*out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
*out++ = shift & 0xff;
}
- else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10))
- {
- /* optimisation for length-3 matches after a copy command */
- shift -= 1<<11;
- goto dump_2sequence;
- }
- /* now, len >= 3 */
- else if (shift < (1<<11) && len <= 8)
+ else if (len == 3 && is_in_copy_mode)
{
- shift |= (len-3 + 2)<<11;
- goto dump_2sequence; /* shift has 11 bits and contains also len */
+ if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
+ {
+ shift -= 1<<11;
+ goto dump_2sequence; /* shift has 11 bits and contains also len */
+ }
+ else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+ goto literal;
}
/* We have to use a 3-byte sequence. */
- else if (len == 3 && is_in_copy_mode)
- /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
- goto literal;
else
{
- if (copy_len > 0)
- out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
if (shift < (1<<14))
{
if (len <= 33)
else /* shift < (1<<15)-1 becase of HASH_RECORDS */
{
shift++; /* because shift==0 is reserved for EOF */
- byte pos_bit = (shift>>11) & (1<<3);
+ byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
if (len <= 9)
- *out++ = (1<<4) | pos_bit | (len-2);
+ *out++ = pos_bit | (len-2);
else
{
- *out++ = (1<<4) | pos_bit;
+ *out++ = pos_bit;
out = dump_unary_value(out, len - 9);
}
}
*out++ = shift & 0xff;
}
/* Update the hash-table. */
- head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
+ head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
for (uns i=1; i<len; i++)
- head = hash_string(in+i, hashf(in+i), hash_tab3, hash_tab4, hash_rec, head);
+ head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
in += len;
copy_start = in;
+ bof = 0;
}
- if (in > in_end) /* crop at BOF */
- in = in_end;
uns copy_len = in - copy_start;
- if (copy_len > 0)
- out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+ if (copy_len)
+ out = flush_copy_command(bof, out, copy_start, copy_len);
*out++ = 17; /* add EOF */
*out++ = 0;
*out++ = 0;
}
int
-lizzard_decompress(byte *in, byte *out)
+lizard_decompress(byte *in, byte *out)
/* Requires out being allocated for the decompressed length must be known
* beforehand. It is desirable to lock the following memory page for
* read-only access to prevent buffer overflow. Returns the actual
if (*in > 17) /* short copy command at BOF */
{
len = *in++ - 17;
- expect_copy_command = 2;
goto perform_copy_command;
}
while (1)
if (expect_copy_command == 1)
{
if (!c)
+ {
in = read_unary_value(in, &len);
+ len += 18;
+ }
else
- len = c;
- expect_copy_command = 2;
+ len = c + 3;
goto perform_copy_command;
}
else
{
- pos = (c&0xc)<<6 | *in++;
+ pos = ((c&0xc)<<6) | *in++;
if (expect_copy_command == 2)
{
pos += 1<<11;
}
else
len = 2;
+ pos++;
}
else if (c < 0x20)
{
pos = (c&0x8)<<11;
len = c&0x7;
- if (len)
+ if (!len)
{
in = read_unary_value(in, &len);
len += 9;
pos |= *in++;
if (!pos) /* EOF */
break;
- else /* shift it back */
- pos--;
+ /* do NOT pos++ */
}
else if (c < 0x40)
{
len = c&0x1f;
- if (len)
+ if (!len)
{
in = read_unary_value(in, &len);
len += 33;
len += 2;
pos = (*in++ & 0xfc)<<6;
pos |= *in++;
+ pos++;
}
else /* high bits encode the length */
{
- len = (c&0xe)>>5 -2 +3;
+ len = ((c&0xe0)>>5) -2 +3;
pos = (c&0x1c)<<6;
pos |= *in++;
+ pos++;
}
/* take from the sliding window */
- memcpy(out, in-1-pos, len); //FIXME: overlapping
- out += len;
+ if (len <= pos)
+ {
+ memcpy(out, out-pos, len);
+ out += len;
+ }
+ else
+ { /* overlapping */
+ for (; len-- > 0; out++)
+ *out = out[-pos];
+ }
/* extract the copy-bits */
len = in[-2] & 0x3;
if (len)
- expect_copy_command = 0; /* and fall-thru */
- else
{
- expect_copy_command = 1;
- continue;
+ expect_copy_command = 0;
+#ifdef CPU_ALLOW_UNALIGNED
+ * (u32*) out = * (u32*) in;
+ out += len;
+ in += len;
+#else
+ while (len-- > 0)
+ *out++ = *in++;
+#endif
}
+ else
+ expect_copy_command = 1;
+ continue;
perform_copy_command:
+ expect_copy_command = 2;
memcpy(out, in, len);
out += len;
in += len;
Description of the LZO1X format :
=================================
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
Beginning of file:
------------------
-If the first byte is 00010001, it means probably EOF (empty file), so switch
-to the compressed mode. If it is bigger, subtract 17 and copy this number of
-the following characters to the ouput and switch to the compressed mode. If
-it is smaller, go to the copy mode.
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
-Compressed mode :
------------------
+Compressed mode:
+----------------
-Read the first byte of the sequence and determine the type of bit-encoding by
+Read the first byte of the sequence and determine the type of bit encoding by
looking at the most significant bits. The sequence is always at least 2 bytes
long. Decode sequences of these types until the EOF or END marker is read.
pattern length position
-0000ppCC pppppppp 2 10 bits (*)
-0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
+0000ppCC pppppppp 2 10 bits [default interpretation]
+0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
+0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
-01\
-10 \
-11 \
-LLLpppCC pppppppp 3..8 11 bits
+LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
-Copy mode :
------------
+Copy mode:
+----------
Read the first byte and, if the most significant bits are 0000, perform the
following command, otherwise switch to the compressed mode (and evaluate the
-command).
+command there).
pattern length position
Copy L characters from the compressed text to the output. The overhead for
incompressible strings is only roughly 1/256 + epsilon.
-(*) After reading one copy command, switch to the compressed mode with the
-following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
-and 2048 is added to the position (now it is slightly more than 11 bits),
-because a sequence of length 2 would never be used.
-
*/