X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Flizard.c;h=6167fee043ba90c07befef746cd6bd917c5c21d7;hb=ff36b07f44efa12a78809ee05bd6d0c25fc60495;hp=ee2e245cd199992450ba7684de0cc08981a591a7;hpb=8bf50727d2f7427cc9d6d04284de4c564d9f9d36;p=libucw.git diff --git a/lib/lizard.c b/lib/lizard.c index ee2e245c..6167fee0 100644 --- a/lib/lizard.c +++ b/lib/lizard.c @@ -181,7 +181,7 @@ lizard_compress(byte *in, uns in_len, byte *out) uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head); if (len < 3) #if 0 // TODO: now, our routine does not detect matches of length 2 - if (len == 2 && (in - best->string) < ((1<<10) + 1)) + if (len == 2 && (in - best->string - 1) < (1<<10)) { /* pass-thru */ } else #endif @@ -212,26 +212,27 @@ literal: goto dump_2sequence; } else #endif - if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10)) + /* now, len >= 3 */ + if (shift < (1<<11) && len <= 8) { - /* optimisation for length-3 matches after a copy command */ - shift -= 1<<11; + shift |= (len-3 + 2)<<11; dump_2sequence: if (copy_len) out = flush_copy_command(bof, out, copy_start, copy_len); *out++ = (shift>>6) & ~3; /* shift fits into 10 bits */ *out++ = shift & 0xff; } - /* now, len >= 3 */ - else if (shift < (1<<11) && len <= 8) + else if (len == 3 && is_in_copy_mode) { - shift |= (len-3 + 2)<<11; - goto dump_2sequence; /* shift has 11 bits and contains also len */ + if (shift < (1<<11) + (1<<10)) /* optimisation for length-3 matches after a copy command */ + { + shift -= 1<<11; + goto dump_2sequence; /* shift has 11 bits and contains also len */ + } + else /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ + goto literal; } /* We have to use a 3-byte sequence. */ - else if (len == 3 && is_in_copy_mode) - /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */ - goto literal; else { if (copy_len) @@ -414,18 +415,22 @@ perform_copy_command: Description of the LZO1X format : ================================= +The meaning of the commands depends on the current mode. It can be either +the compressed mode or the copy mode. In some cases, the compressed mode +also distinguishes whether we just left the copy mode or not. + Beginning of file: ------------------ -If the first byte is 00010001, it means probably EOF (empty file), so switch -to the compressed mode. If it is bigger, subtract 17 and copy this number of -the following characters to the ouput and switch to the compressed mode. If -it is smaller, go to the copy mode. +Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file), +so switch to the compressed mode. If it is bigger, subtract 17 and copy this number of +the following characters to the output and switch to the compressed mode. +If it is smaller, interpret it as a regular copy mode command. -Compressed mode : ------------------ +Compressed mode: +---------------- -Read the first byte of the sequence and determine the type of bit-encoding by +Read the first byte of the sequence and determine the type of bit encoding by looking at the most significant bits. The sequence is always at least 2 bytes long. Decode sequences of these types until the EOF or END marker is read. @@ -449,20 +454,18 @@ long. Decode sequences of these types until the EOF or END marker is read. pattern length position -0000ppCC pppppppp 2 10 bits (*) -0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits + EOF +0000ppCC pppppppp 2 10 bits [default interpretation] +0000ppCC pppppppp 3 10 bits + 2048 [just after return from copy mode] +0001pLLL L* ppppppCC pppppppp 3..9 + extend 15 bits [pos 0 interpreted as EOF] 001LLLLL L* ppppppCC pppppppp 3..33 + extend 14 bits -01\ -10 \ -11 \ -LLLpppCC pppppppp 3..8 11 bits +LLLpppCC pppppppp 3..8 11 bits [LLL >= 010] -Copy mode : ------------ +Copy mode: +---------- Read the first byte and, if the most significant bits are 0000, perform the following command, otherwise switch to the compressed mode (and evaluate the -command). +command there). pattern length position @@ -471,9 +474,4 @@ pattern length position Copy L characters from the compressed text to the output. The overhead for incompressible strings is only roughly 1/256 + epsilon. -(*) After reading one copy command, switch to the compressed mode with the -following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2 -and 2048 is added to the position (now it is slightly more than 11 bits), -because a sequence of length 2 would never be used. - */