Merge with git+ssh://git.ucw.cz/projects/sherlock/GIT/sherlock.git

[libucw.git] / lib / lizard.c
diff --git a/lib/lizard.c b/lib/lizard.c

index 8d53234b3d5b3846b34e58c5760420f329ed7e3e..1da67ccddfc57dde8067e04689b9cbf034d39a8d 100644 (file)
--- a/lib/lizard.c
+++ b/lib/lizard.c
@@ -1,14 +1,17 @@
  /*
- *     LiZzaRd -- Fast compression method based on Lempel-Ziv 77
+ *     LiZaRd -- Fast compression method based on Lempel-Ziv 77
   *
   *     (c) 2004, Robert Spalek <robert@ucw.cz>
   *
- *     The file format is based on LZO1X and 
+ *     This software may be freely distributed and used according to the terms
+ *     of the GNU Lesser General Public License.
+ *
+ *     The file format is based on LZO1X and
   *     the compression method is based on zlib.
   */
  
  #include "lib/lib.h"
-#include "lib/lizzard.h"
+#include "lib/lizard.h"
  
  #include <string.h>
  
@@ -25,25 +28,25 @@ struct hash_record {
  #define        CHAIN_GOOD_MATCH        32      // we already have a good match => end
  
  static inline uns
-hashf(byte *string)
+hashf(const byte *string)
    /* 0..HASH_SIZE-1 */
  {
      return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
  }
  
  static inline byte *
-locate_string(byte *string, uns record_id, uns head)
+locate_string(const byte *string, int record_id, int head)
    /* The strings are recorded into the hash-table regularly, hence there is no
     * need to store the pointer there.  */
  {
    string += record_id - head;
    if (record_id >= head)
      string -= HASH_RECORDS-1;
-  return string;
+  return (byte *)string;
  }
  
  static inline uns
-find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
+find_match(uns record_id, struct hash_record *hash_rec, const byte *string, const byte *string_end, byte **best_ptr, uns head)
    /* hash_tab[hash] == record_id points to the head of the double-linked
     * link-list of strings with the same hash.  The records are statically
     * stored in circular array hash_rec (with the 1st entry unused), and the
@@ -65,7 +68,7 @@ find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *stri
         if (*cmp++ == string[4] && *cmp++ == string[5]
             && *cmp++ == string[6] && *cmp++ == string[7])
         {
-         byte *str = string + 8;
+         const byte *str = string + 8;
           while (str <= string_end && *cmp++ == *str++);
         }
        }
@@ -125,13 +128,23 @@ dump_unary_value(byte *out, uns l)
  }
  
  static byte *
-flush_copy_command(uns bof, byte *out, byte *start, uns len)
+flush_copy_command(uns bof, byte *out, const byte *start, uns len)
  {
    if (bof && len <= 238)
      *out++ = len + 17;
    else if (len < 4)
+  {
      /* cannot happen when !!bof */
      out[-2] |= len;                            /* invariant: lowest 2 bits 2 bytes back */
+#ifdef CPU_ALLOW_UNALIGNED
+    * (u32*) out = * (u32*) start;
+    return out + len;
+#else
+    while (len-- > 0)
+      *out++ = *start++;
+    return out;
+#endif
+  }
    else
    {
      /* leave 2 least significant bits of out[-2] set to 0 */
@@ -143,34 +156,32 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len)
        out = dump_unary_value(out, len - 18);
      }
    }
-  while (len-- > 0)
-    *out++ = *start++;
-  return out;
+  memcpy(out, start, len);
+  return out + len;
  }
  
  int
-lizzard_compress(byte *in, uns in_len, byte *out)
-  /* Requires out being allocated for at least in_len * LIZZARD_MAX_MULTIPLY +
-   * LIZZARD_MAX_ADD.  There must be at least LIZZARD_NEEDS_CHARS characters
+lizard_compress(const byte *in, uns in_len, byte *out)
+  /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+   * LIZARD_MAX_ADD.  There must be at least LIZARD_NEEDS_CHARS characters
     * allocated after in.  Returns the actual compressed length. */
  {
    hash_ptr_t hash_tab[HASH_SIZE];
    struct hash_record hash_rec[HASH_RECORDS];
-  byte *in_start = in;
-  byte *in_end = in + in_len;
+  const byte *in_end = in + in_len;
    byte *out_start = out;
-  byte *copy_start = in;
+  const byte *copy_start = in;
    uns head = 1;                                        /* 0 in unused */
-  uns to_delete = 0;
+  uns to_delete = 0, bof = 1;
    bzero(hash_tab, sizeof(hash_tab));           /* init the hash-table */
    while (in < in_end)
    {
      uns hash = hashf(in);
-    byte *best;
+    byte *best = NULL;
      uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
      if (len < 3)
  #if 0                  // TODO: now, our routine does not detect matches of length 2
-      if (len == 2 && (in - best->string) < ((1<<10) + 1))
+      if (len == 2 && (in - best->string - 1) < (1<<10))
        { /* pass-thru */ }
        else
  #endif
@@ -189,7 +200,7 @@ literal:
      }
      /* Record the match.  */
      uns copy_len = in - copy_start;
-    uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
+    uns is_in_copy_mode = bof || copy_len >= 4;
      uns shift = in - best - 1;
      /* Try to use a 2-byte sequence.  */
  #if 0
@@ -201,30 +212,31 @@ literal:
         goto dump_2sequence;
      } else
  #endif
-    if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
+    /* now, len >= 3 */
+    if (shift < (1<<11) && len <= 8)
      {
-      /* optimisation for length-3 matches after a copy command */
-      shift -= 1<<11;
+      shift |= (len-3 + 2)<<11;
  dump_2sequence:
-      if (copy_len > 0)
-       out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+      if (copy_len)
+       out = flush_copy_command(bof, out, copy_start, copy_len);
        *out++ = (shift>>6) & ~3;                        /* shift fits into 10 bits */
        *out++ = shift & 0xff;
      }
-    /* now, len >= 3 */
-    else if (shift < (1<<11) && len <= 8)
+    else if (len == 3 && is_in_copy_mode)
      {
-      shift |= (len-3 + 2)<<11;
-      goto dump_2sequence;                     /* shift has 11 bits and contains also len */
+      if (shift < (1<<11) + (1<<10))           /* optimisation for length-3 matches after a copy command */
+      {
+       shift -= 1<<11;
+       goto dump_2sequence;                    /* shift has 11 bits and contains also len */
+      }
+      else                                     /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+       goto literal;
      }
      /* We have to use a 3-byte sequence.  */
-    else if (len == 3 && is_in_copy_mode)
-      /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
-      goto literal;
      else
      {
-      if (copy_len > 0)
-       out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+      if (copy_len)
+       out = flush_copy_command(bof, out, copy_start, copy_len);
        if (shift < (1<<14))
        {
         if (len <= 33)
@@ -238,12 +250,12 @@ dump_2sequence:
        else /* shift < (1<<15)-1 becase of HASH_RECORDS */
        {
         shift++;                                /* because shift==0 is reserved for EOF */
-       byte pos_bit = (shift>>11) & (1<<3);
+       byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
         if (len <= 9)
-         *out++ = (1<<4) | pos_bit | (len-2);
+         *out++ = pos_bit | (len-2);
         else
         {
-         *out++ = (1<<4) | pos_bit;
+         *out++ = pos_bit;
           out = dump_unary_value(out, len - 9);
         }
        }
@@ -256,12 +268,11 @@ dump_2sequence:
        head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
      in += len;
      copy_start = in;
+    bof = 0;
    }
-  if (in > in_end)                             /* crop at BOF */
-    in = in_end;
    uns copy_len = in - copy_start;
-  if (copy_len > 0)
-    out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+  if (copy_len)
+    out = flush_copy_command(bof, out, copy_start, copy_len);
    *out++ = 17;                                 /* add EOF */
    *out++ = 0;
    *out++ = 0;
@@ -269,18 +280,18 @@ dump_2sequence:
  }
  
  static inline byte *
-read_unary_value(byte *in, uns *val)
+read_unary_value(const byte *in, uns *val)
  {
    uns l = 0;
    while (!*in++)
      l += 255;
    l += in[-1];
    *val = l;
-  return in;
+  return (byte *)in;
  }
  
  int
-lizzard_decompress(byte *in, byte *out)
+lizard_decompress(const byte *in, byte *out)
    /* Requires out being allocated for the decompressed length must be known
     * beforehand.  It is desirable to lock the following memory page for
     * read-only access to prevent buffer overflow.  Returns the actual
@@ -292,7 +303,6 @@ lizzard_decompress(byte *in, byte *out)
    if (*in > 17)                                        /* short copy command at BOF */
    {
      len = *in++ - 17;
-    expect_copy_command = 2;
      goto perform_copy_command;
    }
    while (1)
@@ -309,7 +319,6 @@ lizzard_decompress(byte *in, byte *out)
         }
         else
           len = c + 3;
-       expect_copy_command = 2;
         goto perform_copy_command;
        }
        else
@@ -371,21 +380,29 @@ lizzard_decompress(byte *in, byte *out)
      else
      {                                          /* overlapping */
        for (; len-- > 0; out++)
-       *out = out[-pos];
+       *out = *(out-pos);
+      /* It's tempting to use out[-pos] above, but unfortunately it's not the same */
      }
      /* extract the copy-bits */
      len = in[-2] & 0x3;
      if (len)
      {
        expect_copy_command = 0;
+#ifdef CPU_ALLOW_UNALIGNED
+      * (u32*) out = * (u32*) in;
+      out += len;
+      in += len;
+#else
        while (len-- > 0)
         *out++ = *in++;
+#endif
      }
      else
        expect_copy_command = 1;
      continue;
  
  perform_copy_command:
+    expect_copy_command = 2;
      memcpy(out, in, len);
      out += len;
      in += len;
@@ -399,18 +416,22 @@ perform_copy_command:
  Description of the LZO1X format :
  =================================
  
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
  Beginning of file:
  ------------------
  
-If the first byte is 00010001, it means probably EOF (empty file), so switch
-to the compressed mode.  If it is bigger, subtract 17 and copy this number of
-the following characters to the ouput and switch to the compressed mode.  If
-it is smaller, go to the copy mode.
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode.  If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
  
-Compressed mode :
------------------
+Compressed mode:
+----------------
  
-Read the first byte of the sequence and determine the type of bit-encoding by
+Read the first byte of the sequence and determine the type of bit encoding by
  looking at the most significant bits.  The sequence is always at least 2 bytes
  long.  Decode sequences of these types until the EOF or END marker is read.
  
@@ -429,25 +450,23 @@ long.  Decode sequences of these types until the EOF or END marker is read.
      C following characters will be copied from the compressed text to the
      output.  The number CC is always stored in the 2 least significant bits of
      the second last byte of the sequence.
-    
+
      If END is read, the algorithm switches to the copy mode.
  
  pattern                                        length          position
  
-0000ppCC                pppppppp       2               10 bits (*)
-0001pLLL L*    ppppppCC pppppppp       3..9 + extend   15 bits + EOF
+0000ppCC                pppppppp       2               10 bits         [default interpretation]
+0000ppCC                pppppppp       3               10 bits + 2048  [just after return from copy mode]
+0001pLLL L*    ppppppCC pppppppp       3..9 + extend   15 bits         [pos 0 interpreted as EOF]
  001LLLLL L*    ppppppCC pppppppp       3..33 + extend  14 bits
-01\
-10 \
-11  \
-LLLpppCC                pppppppp       3..8            11 bits
+LLLpppCC                pppppppp       3..8            11 bits         [LLL >= 010]
  
-Copy mode :
------------
+Copy mode:
+----------
  
  Read the first byte and, if the most significant bits are 0000, perform the
  following command, otherwise switch to the compressed mode (and evaluate the
-command).
+command there).
  
  pattern                                        length          position
  
@@ -456,9 +475,4 @@ pattern                                     length          position
    Copy L characters from the compressed text to the output.  The overhead for
    incompressible strings is only roughly 1/256 + epsilon.
  
-(*) After reading one copy command, switch to the compressed mode with the
-following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
-and 2048 is added to the position (now it is slightly more than 11 bits),
-because a sequence of length 2 would never be used.
-
  */