* This software may be freely distributed and used according to the terms
* of the GNU Lesser General Public License.
*
- * The file format is based on LZO1X and
+ * The file format is based on LZO1X and
* the compression method is based on zlib.
*/
#define CHAIN_GOOD_MATCH 32 // we already have a good match => end
static inline uns
-hashf(byte *string)
+hashf(const byte *string)
/* 0..HASH_SIZE-1 */
{
return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
}
static inline byte *
-locate_string(byte *string, uns record_id, uns head)
+locate_string(const byte *string, int record_id, int head)
/* The strings are recorded into the hash-table regularly, hence there is no
* need to store the pointer there. */
{
string += record_id - head;
if (record_id >= head)
string -= HASH_RECORDS-1;
- return string;
+ return (byte *)string;
}
static inline uns
-find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
/* hash_tab[hash] == record_id points to the head of the double-linked
* link-list of strings with the same hash. The records are statically
* stored in circular array hash_rec (with the 1st entry unused), and the
if (*cmp++ == string[4] && *cmp++ == string[5]
&& *cmp++ == string[6] && *cmp++ == string[7])
{
- byte *str = string + 8;
+ const byte *str = string + 8;
while (str <= string_end && *cmp++ == *str++);
}
}
}
static byte *
-flush_copy_command(uns bof, byte *out, byte *start, uns len)
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
{
if (bof && len <= 238)
*out++ = len + 17;
}
int
-lizard_compress(byte *in, uns in_len, byte *out)
+lizard_compress(const byte *in, uns in_len, byte *out)
/* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
* LIZARD_MAX_ADD. There must be at least LIZARD_NEEDS_CHARS characters
* allocated after in. Returns the actual compressed length. */
{
hash_ptr_t hash_tab[HASH_SIZE];
struct hash_record hash_rec[HASH_RECORDS];
- byte *in_end = in + in_len;
+ const byte *in_end = in + in_len;
byte *out_start = out;
- byte *copy_start = in;
+ const byte *copy_start = in;
uns head = 1; /* 0 in unused */
uns to_delete = 0, bof = 1;
bzero(hash_tab, sizeof(hash_tab)); /* init the hash-table */
while (in < in_end)
{
uns hash = hashf(in);
- byte *best;
+ byte *best = NULL;
uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
if (len < 3)
#if 0 // TODO: now, our routine does not detect matches of length 2
}
static inline byte *
-read_unary_value(byte *in, uns *val)
+read_unary_value(const byte *in, uns *val)
{
uns l = 0;
while (!*in++)
l += 255;
l += in[-1];
*val = l;
- return in;
+ return (byte *)in;
}
int
-lizard_decompress(byte *in, byte *out)
+lizard_decompress(const byte *in, byte *out)
/* Requires out being allocated for the decompressed length must be known
* beforehand. It is desirable to lock the following memory page for
* read-only access to prevent buffer overflow. Returns the actual
else
{ /* overlapping */
for (; len-- > 0; out++)
- *out = out[-pos];
+ *out = *(out-pos);
+ /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
}
/* extract the copy-bits */
len = in[-2] & 0x3;
Description of the LZO1X format :
=================================
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
Beginning of file:
------------------
-If the first byte is 00010001, it means probably EOF (empty file), so switch
-to the compressed mode. If it is bigger, subtract 17 and copy this number of
-the following characters to the ouput and switch to the compressed mode. If
-it is smaller, go to the copy mode.
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
-Compressed mode :
------------------
+Compressed mode:
+----------------
-Read the first byte of the sequence and determine the type of bit-encoding by
+Read the first byte of the sequence and determine the type of bit encoding by
looking at the most significant bits. The sequence is always at least 2 bytes
long. Decode sequences of these types until the EOF or END marker is read.
C following characters will be copied from the compressed text to the
output. The number CC is always stored in the 2 least significant bits of
the second last byte of the sequence.
-
+
If END is read, the algorithm switches to the copy mode.
pattern length position
-0000ppCC pppppppp 2 10 bits (*)
-0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
+0000ppCC pppppppp 2 10 bits [default interpretation]
+0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
+0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
-01\
-10 \
-11 \
-LLLpppCC pppppppp 3..8 11 bits
+LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
-Copy mode :
------------
+Copy mode:
+----------
Read the first byte and, if the most significant bits are 0000, perform the
following command, otherwise switch to the compressed mode (and evaluate the
-command).
+command there).
pattern length position
Copy L characters from the compressed text to the output. The overhead for
incompressible strings is only roughly 1/256 + epsilon.
-(*) After reading one copy command, switch to the compressed mode with the
-following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
-and 2048 is added to the position (now it is slightly more than 11 bits),
-because a sequence of length 2 would never be used.
-
*/