uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
if (len < 3)
#if 0 // TODO: now, our routine does not detect matches of length 2
- if (len == 2 && (in - best->string) < ((1<<10) + 1))
+ if (len == 2 && (in - best->string - 1) < (1<<10))
{ /* pass-thru */ }
else
#endif
goto dump_2sequence;
} else
#endif
- if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
+ /* now, len >= 3 */
+ if (shift < (1<<11) && len <= 8)
{
- /* optimisation for length-3 matches after a copy command */
- shift -= 1<<11;
+ shift |= (len-3 + 2)<<11;
dump_2sequence:
if (copy_len)
out = flush_copy_command(bof, out, copy_start, copy_len);
*out++ = (shift>>6) & ~3; /* shift fits into 10 bits */
*out++ = shift & 0xff;
}
- /* now, len >= 3 */
- else if (shift < (1<<11) && len <= 8)
+ else if (len == 3 && is_in_copy_mode)
{
- shift |= (len-3 + 2)<<11;
- goto dump_2sequence; /* shift has 11 bits and contains also len */
+ if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */
+ {
+ shift -= 1<<11;
+ goto dump_2sequence; /* shift has 11 bits and contains also len */
+ }
+ else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+ goto literal;
}
/* We have to use a 3-byte sequence. */
- else if (len == 3 && is_in_copy_mode)
- /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
- goto literal;
else
{
if (copy_len)
Description of the LZO1X format :
=================================
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
Beginning of file:
------------------
-If the first byte is 00010001, it means probably EOF (empty file), so switch
-to the compressed mode. If it is bigger, subtract 17 and copy this number of
-the following characters to the ouput and switch to the compressed mode. If
-it is smaller, go to the copy mode.
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
-Compressed mode :
------------------
+Compressed mode:
+----------------
-Read the first byte of the sequence and determine the type of bit-encoding by
+Read the first byte of the sequence and determine the type of bit encoding by
looking at the most significant bits. The sequence is always at least 2 bytes
long. Decode sequences of these types until the EOF or END marker is read.
pattern length position
-0000ppCC pppppppp 2 10 bits (*)
-0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF
+0000ppCC pppppppp 2 10 bits [default interpretation]
+0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode]
+0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF]
001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits
-01\
-10 \
-11 \
-LLLpppCC pppppppp 3..8 11 bits
+LLLpppCC pppppppp 3..8 11 bits [LLL >= 010]
-Copy mode :
------------
+Copy mode:
+----------
Read the first byte and, if the most significant bits are 0000, perform the
following command, otherwise switch to the compressed mode (and evaluate the
-command).
+command there).
pattern length position
Copy L characters from the compressed text to the output. The overhead for
incompressible strings is only roughly 1/256 + epsilon.
-(*) After reading one copy command, switch to the compressed mode with the
-following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
-and 2048 is added to the position (now it is slightly more than 11 bits),
-because a sequence of length 2 would never be used.
-
*/