X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=lib%2Flizard.c;h=6167fee043ba90c07befef746cd6bd917c5c21d7;hb=8107422b6cc0fda08e55218a3a79ff1580779a95;hp=c71fc9accb52b85a7a49281bdf2e42c3d40df2fd;hpb=d94fc019538bc9c96b694bf0cb1924fbd9251a6b;p=libucw.git

diff --git a/lib/lizard.c b/lib/lizard.c
index c71fc9ac..6167fee0 100644
--- a/lib/lizard.c
+++ b/lib/lizard.c
@@ -1,154 +1,117 @@
 /*
- *	LiZzaRd -- Fast compression method based on Lempel-Ziv 77
+ *	LiZaRd -- Fast compression method based on Lempel-Ziv 77
  *
  *	(c) 2004, Robert Spalek <robert@ucw.cz>
  *
+ *	This software may be freely distributed and used according to the terms
+ *	of the GNU Lesser General Public License.
+ *
  *	The file format is based on LZO1X and 
  *	the compression method is based on zlib.
  */
 
 #include "lib/lib.h"
-#include "lib/lizzard.h"
+#include "lib/lizard.h"
 
 #include <string.h>
 
 typedef u16 hash_ptr_t;
 struct hash_record {
-  byte *string;				// in original text
-  hash_ptr_t next4, next3;		// 0=end
+  /* the position in the original text is implicit; it is computed by locate_string() */
+  hash_ptr_t next;			// 0=end
+  hash_ptr_t prev;			// high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
 };
 
-#define	HASH_SIZE	((1<<14) + 27)	// prime, size of hash-table
-#define	HASH_RECORDS	((1<<15) - 1)	// maximum number of records in hash-table
-#define	CHAIN_MAX_TESTS	16		// crop longer collision chains
-#define	CHAIN_GOOD_MATCH	16	// we already have a good match => end
+#define	HASH_SIZE	(1<<14)		// size of hash-table
+#define	HASH_RECORDS	(1<<15)		// maximum number of records in hash-table, 0 is unused ==> subtract 1
+#define	CHAIN_MAX_TESTS		8	// crop longer collision chains
+#define	CHAIN_GOOD_MATCH	32	// we already have a good match => end
 
 static inline uns
 hashf(byte *string)
+  /* 0..HASH_SIZE-1 */
 {
-    uns hash;
-#ifdef	CPU_ALLOW_UNALIGNED
-    hash = * (uns *) string;
-#elif	CPU_LITTLE_ENDIAN
-    hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24);
-#else
-    hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24);
-#endif
-    return hash;
+    return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
 }
-#ifdef	CPU_LITTLE_ENDIAN
-#define	MASK_4TH_CHAR	(~(0xff << 24))
-#else
-#define	MASK_4TH_CHAR	(~0xff)
-#endif
 
-static inline uns
-find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr)
-  /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence
-   * some branches of the code will be pruned).
-   *
-   * hash_tab[hash] points to the 1st record of the link-list of strings with the
-   * same hash.  The records are statically stored in circular array hash_rec
-   * (with 1st entry unused), and the pointers are just 16-bit indices.  In one
-   * array, we store 2 independent link-lists: with 3 and 4 hashed characters.
-   *
-   * Updates are performed more often than searches, so they are lazy: we only
-   * add new records in front of link-lists and never delete them.  By reusing
-   * an already allocated space in the circular array we necessarily modify
-   * records that are already pointed at in the middle of some link-list.  To
-   * prevent confusion, this function checks 2 invariants: the strings in a
-   * collision chain are ordered, and the hash of the 1st record in the chain
-   * must match.  */
+static inline byte *
+locate_string(byte *string, uns record_id, uns head)
+  /* The strings are recorded into the hash-table regularly, hence there is no
+   * need to store the pointer there.  */
 {
-  if (list_id == 3)				/* Find the 1st record in the collision chain.  */
-    hash = hash & MASK_4TH_CHAR;
-  hash %= HASH_SIZE;
-  if (!hash_tab[hash])
-    return 0;
-  struct hash_record *record = hash_rec + hash_tab[hash];
-  uns hash2 = hashf(record->string);
-  if (list_id == 3)
-    hash2 = hash2 & MASK_4TH_CHAR;
-  hash2 %= HASH_SIZE;
-  if (hash2 != hash)				/* Verify the hash-value.  */
-  {
-    hash_tab[hash] = 0;				/* prune */
-    return 0;
-  }
+  string += record_id - head;
+  if (record_id >= head)
+    string -= HASH_RECORDS-1;
+  return string;
+}
 
+static inline uns
+find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
+  /* hash_tab[hash] == record_id points to the head of the double-linked
+   * link-list of strings with the same hash.  The records are statically
+   * stored in circular array hash_rec (with the 1st entry unused), and the
+   * pointers are just 16-bit indices.  The strings in every collision chain
+   * are ordered by age.  */
+{
   uns count = CHAIN_MAX_TESTS;
   uns best_len = 0;
-  byte *last_string = string_end;
-  hash_ptr_t *last_ptr = NULL;
-  while (count-- > 0)
+  while (record_id && count-- > 0)
   {
-    byte *cmp = record->string;
-    if (cmp >= last_string)			/* collision chain is ordered by age */
-    {
-      *last_ptr = 0;				/* prune */
-      break;
-    }
+    byte *record_string = locate_string(string, record_id, head);
+    byte *cmp = record_string;
     if (cmp[0] == string[0] && cmp[2] == string[2])
+    /* implies cmp[1] == string[1] */
     {
       if (cmp[3] == string[3])
       {
-	/* implies cmp[1] == string[1], becase we hash into 14+eps bits */
 	cmp += 4;
 	if (*cmp++ == string[4] && *cmp++ == string[5]
 	    && *cmp++ == string[6] && *cmp++ == string[7])
 	{
 	  byte *str = string + 8;
-	  while (str <= string_end && *cmp++ == *string++);
+	  while (str <= string_end && *cmp++ == *str++);
 	}
       }
-      else if (list_id == 3)
-	/* hashing 3 characters implies cmp[1] == string[1]
-	   hashing 4 characters implies cmp[1] != string[1] */
-	cmp += 4;
       else
-	goto next_collision;
-      uns len = cmp - record->string - 1;	/* cmp points 2 characters after the last match */
+	cmp += 4;
+      uns len = cmp - record_string - 1;	/* cmp points 2 characters after the last match */
       if (len > best_len)
       {
 	best_len = len;
-	*best_ptr = record->string;
-	if (best_len >= CHAIN_GOOD_MATCH)	/* optimisation */
+	*best_ptr = record_string;
+	if (best_len >= CHAIN_GOOD_MATCH)	/* optimization */
 	  break;
       }
     }
-next_collision:
-    if (list_id == 3)
-    {
-      if (!record->next3)
-	break;
-      last_ptr = &record->next3;
-      record = hash_rec + record->next3;
-    }
-    else
-    {
-      if (!record->next4)
-	break;
-      last_ptr = &record->next4;
-      record = hash_rec + record->next4;
-    }
+    record_id = hash_rec[record_id].next;
   }
   return best_len;
 }
 
-static inline uns
-hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head)
+static uns
+hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
+  /* We reuse hash-records stored in a circular array.  First, delete the old
+   * one and then add the new one in front of the link-list.  */
 {
-  uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE;
-  uns h4 = hash % HASH_SIZE;
   struct hash_record *rec = hash_rec + head;
-  rec->string = string;
-  rec->next3 = tab3[h3];
-  rec->next4 = tab4[h4];
-  tab3[h3] = head;				/* add the new record before the link-list */
-  tab4[h4] = head;
-  head++;					/* circular buffer, reuse old records */
-  if (head >= HASH_RECORDS)
-    head = 0;
+  if (*to_delete)				/* unlink the original record */
+  {
+    uns prev_id = rec->prev & ((1<<15)-1);
+    if (rec->prev & (1<<15))			/* was a head */
+      hash_tab[prev_id] = 0;
+    else					/* thanks to the ordering, this was a tail */
+      hash_rec[prev_id].next = 0;
+  }
+  rec->next = hash_tab[hash];
+  rec->prev = (1<<15) | hash;
+  hash_rec[rec->next].prev = head;
+  hash_tab[hash] = head;			/* add the new record before the link-list */
+
+  if (++head >= HASH_RECORDS)			/* circular buffer, reuse old records, 0 is unused */
+  {
+    head = 1;
+    *to_delete = 1;
+  }
   return head;
 }
 
@@ -170,8 +133,18 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len)
   if (bof && len <= 238)
     *out++ = len + 17;
   else if (len < 4)
+  {
     /* cannot happen when !!bof */
     out[-2] |= len;				/* invariant: lowest 2 bits 2 bytes back */
+#ifdef	CPU_ALLOW_UNALIGNED
+    * (u32*) out = * (u32*) start;
+    return out + len;
+#else
+    while (len-- > 0)
+      *out++ = *start++;
+    return out;
+#endif
+  }
   else
   {
     /* leave 2 least significant bits of out[-2] set to 0 */
@@ -183,90 +156,87 @@ flush_copy_command(uns bof, byte *out, byte *start, uns len)
       out = dump_unary_value(out, len - 18);
     }
   }
-  while (len-- > 0)
-    *out++ = *start++;
-  return out;
+  memcpy(out, start, len);
+  return out + len;
 }
 
 int
-lizzard_compress(byte *in, uns in_len, byte *out)
-  /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR.
-   * There must be at least 8 characters allocated after in.
-   * Returns the actual compressed length. */
+lizard_compress(byte *in, uns in_len, byte *out)
+  /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
+   * LIZARD_MAX_ADD.  There must be at least LIZARD_NEEDS_CHARS characters
+   * allocated after in.  Returns the actual compressed length. */
 {
-  hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE];
+  hash_ptr_t hash_tab[HASH_SIZE];
   struct hash_record hash_rec[HASH_RECORDS];
-  byte *in_start = in;
   byte *in_end = in + in_len;
   byte *out_start = out;
   byte *copy_start = in;
-  uns head = 0;
-  bzero(hash_tab3, sizeof(hash_tab3));		/* init the hash-table */
-  bzero(hash_tab4, sizeof(hash_tab4));
+  uns head = 1;					/* 0 in unused */
+  uns to_delete = 0, bof = 1;
+  bzero(hash_tab, sizeof(hash_tab));		/* init the hash-table */
   while (in < in_end)
   {
     uns hash = hashf(in);
     byte *best;
-    uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best);
-    if (len >= 3)
-      goto match;
-    len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best);
-
-match_found:
-    if (len >= 3)
-      goto match;
+    uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
+    if (len < 3)
 #if 0			// TODO: now, our routine does not detect matches of length 2
-    if (len == 2 && (in - best->string) < ((1<<10) + 1))
-      goto match;
+      if (len == 2 && (in - best->string - 1) < (1<<10))
+      { /* pass-thru */ }
+      else
 #endif
+      {
 literal:
-    head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
-    in++;					/* add a literal */
-    continue;
+	head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
+	in++;					/* add a literal */
+	continue;
+      }
 
-match:
     if (in + len > in_end)			/* crop EOF */
     {
       len = in_end - in;
       if (len < 3)
-	goto match_found;
+	goto literal;
     }
     /* Record the match.  */
     uns copy_len = in - copy_start;
-    uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
+    uns is_in_copy_mode = bof || copy_len >= 4;
     uns shift = in - best - 1;
     /* Try to use a 2-byte sequence.  */
+#if 0
     if (len == 2)
     {
       if (is_in_copy_mode || !copy_len)		/* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
 	goto literal;
-
+      else
+	goto dump_2sequence;
+    } else
+#endif
+    /* now, len >= 3 */
+    if (shift < (1<<11) && len <= 8)
+    {
+      shift |= (len-3 + 2)<<11;
 dump_2sequence:
-      if (copy_len > 0)
-	out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+      if (copy_len)
+	out = flush_copy_command(bof, out, copy_start, copy_len);
       *out++ = (shift>>6) & ~3;			/* shift fits into 10 bits */
       *out++ = shift & 0xff;
     }
-    else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10))
-    {
-      /* optimisation for length-3 matches after a copy command */
-      shift -= 1<<11;
-      goto dump_2sequence;
-    }
-    /* now, len >= 3 */
-    else if (shift < (1<<11) && len <= 8)
+    else if (len == 3 && is_in_copy_mode)
     {
-      shift |= (len-3 + 2)<<11;
-      goto dump_2sequence;			/* shift has 11 bits and contains also len */
+      if (shift < (1<<11) + (1<<10))		/* optimisation for length-3 matches after a copy command */
+      {
+	shift -= 1<<11;
+	goto dump_2sequence;			/* shift has 11 bits and contains also len */
+      }
+      else					/* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
+	goto literal;
     }
     /* We have to use a 3-byte sequence.  */
-    else if (len == 3 && is_in_copy_mode)
-      /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
-      goto literal;
     else
     {
-      if (copy_len > 0)
-	out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+      if (copy_len)
+	out = flush_copy_command(bof, out, copy_start, copy_len);
       if (shift < (1<<14))
       {
 	if (len <= 33)
@@ -280,12 +250,12 @@ dump_2sequence:
       else /* shift < (1<<15)-1 becase of HASH_RECORDS */
       {
 	shift++;				/* because shift==0 is reserved for EOF */
-	byte pos_bit = (shift>>11) & (1<<3);
+	byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
 	if (len <= 9)
-	  *out++ = (1<<4) | pos_bit | (len-2);
+	  *out++ = pos_bit | (len-2);
 	else
 	{
-	  *out++ = (1<<4) | pos_bit;
+	  *out++ = pos_bit;
 	  out = dump_unary_value(out, len - 9);
 	}
       }
@@ -293,17 +263,16 @@ dump_2sequence:
       *out++ = shift & 0xff;
     }
     /* Update the hash-table.  */
-    head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
+    head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
     for (uns i=1; i<len; i++)
-      head = hash_string(in+i, hashf(in+i), hash_tab3, hash_tab4, hash_rec, head);
+      head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
     in += len;
     copy_start = in;
+    bof = 0;
   }
-  if (in > in_end)				/* crop at BOF */
-    in = in_end;
   uns copy_len = in - copy_start;
-  if (copy_len > 0)
-    out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
+  if (copy_len)
+    out = flush_copy_command(bof, out, copy_start, copy_len);
   *out++ = 17;					/* add EOF */
   *out++ = 0;
   *out++ = 0;
@@ -322,7 +291,7 @@ read_unary_value(byte *in, uns *val)
 }
 
 int
-lizzard_decompress(byte *in, byte *out)
+lizard_decompress(byte *in, byte *out)
   /* Requires out being allocated for the decompressed length must be known
    * beforehand.  It is desirable to lock the following memory page for
    * read-only access to prevent buffer overflow.  Returns the actual
@@ -334,7 +303,6 @@ lizzard_decompress(byte *in, byte *out)
   if (*in > 17)					/* short copy command at BOF */
   {
     len = *in++ - 17;
-    expect_copy_command = 2;
     goto perform_copy_command;
   }
   while (1)
@@ -345,15 +313,17 @@ lizzard_decompress(byte *in, byte *out)
       if (expect_copy_command == 1)
       {
 	if (!c)
+	{
 	  in = read_unary_value(in, &len);
+	  len += 18;
+	}
 	else
-	  len = c;
-	expect_copy_command = 2;
+	  len = c + 3;
 	goto perform_copy_command;
       }
       else
       {
-	pos = (c&0xc)<<6 | *in++;
+	pos = ((c&0xc)<<6) | *in++;
 	if (expect_copy_command == 2)
 	{
 	  pos += 1<<11;
@@ -361,12 +331,13 @@ lizzard_decompress(byte *in, byte *out)
 	}
 	else
 	  len = 2;
+	pos++;
       }
     else if (c < 0x20)
     {
       pos = (c&0x8)<<11;
       len = c&0x7;
-      if (len)
+      if (!len)
       {
 	in = read_unary_value(in, &len);
 	len += 9;
@@ -377,13 +348,12 @@ lizzard_decompress(byte *in, byte *out)
       pos |= *in++;
       if (!pos)					/* EOF */
 	break;
-      else					/* shift it back */
-	pos--;
+      /* do NOT pos++ */
     }
     else if (c < 0x40)
     {
       len = c&0x1f;
-      if (len)
+      if (!len)
       {
 	in = read_unary_value(in, &len);
 	len += 33;
@@ -392,27 +362,46 @@ lizzard_decompress(byte *in, byte *out)
 	len += 2;
       pos = (*in++ & 0xfc)<<6;
       pos |= *in++;
+      pos++;
     }
     else /* high bits encode the length */
     {
-      len = (c&0xe)>>5 -2 +3;
+      len = ((c&0xe0)>>5) -2 +3;
       pos = (c&0x1c)<<6;
       pos |= *in++;
+      pos++;
     }
     /* take from the sliding window */
-    memcpy(out, in-1-pos, len);			//FIXME: overlapping
-    out += len;
+    if (len <= pos)
+    {
+      memcpy(out, out-pos, len);
+      out += len;
+    }
+    else
+    {						/* overlapping */
+      for (; len-- > 0; out++)
+	*out = out[-pos];
+    }
     /* extract the copy-bits */
     len = in[-2] & 0x3;
     if (len)
-      expect_copy_command = 0;			/* and fall-thru */
-    else
     {
-      expect_copy_command = 1;
-      continue;
+      expect_copy_command = 0;
+#ifdef	CPU_ALLOW_UNALIGNED
+      * (u32*) out = * (u32*) in;
+      out += len;
+      in += len;
+#else
+      while (len-- > 0)
+	*out++ = *in++;
+#endif
     }
+    else
+      expect_copy_command = 1;
+    continue;
 
 perform_copy_command:
+    expect_copy_command = 2;
     memcpy(out, in, len);
     out += len;
     in += len;
@@ -426,18 +415,22 @@ perform_copy_command:
 Description of the LZO1X format :
 =================================
 
+The meaning of the commands depends on the current mode. It can be either
+the compressed mode or the copy mode. In some cases, the compressed mode
+also distinguishes whether we just left the copy mode or not.
+
 Beginning of file:
 ------------------
 
-If the first byte is 00010001, it means probably EOF (empty file), so switch
-to the compressed mode.  If it is bigger, subtract 17 and copy this number of
-the following characters to the ouput and switch to the compressed mode.  If
-it is smaller, go to the copy mode.
+Start in copy mode. If the first byte is 00010001, it means probably EOF (empty file),
+so switch to the compressed mode.  If it is bigger, subtract 17 and copy this number of
+the following characters to the output and switch to the compressed mode.
+If it is smaller, interpret it as a regular copy mode command.
 
-Compressed mode :
------------------
+Compressed mode:
+----------------
 
-Read the first byte of the sequence and determine the type of bit-encoding by
+Read the first byte of the sequence and determine the type of bit encoding by
 looking at the most significant bits.  The sequence is always at least 2 bytes
 long.  Decode sequences of these types until the EOF or END marker is read.
 
@@ -461,20 +454,18 @@ long.  Decode sequences of these types until the EOF or END marker is read.
 
 pattern					length		position
 
-0000ppCC 		 pppppppp	2		10 bits (*)
-0001pLLL L*	ppppppCC pppppppp	3..9 + extend	15 bits + EOF
+0000ppCC 		 pppppppp	2		10 bits		[default interpretation]
+0000ppCC 		 pppppppp	3		10 bits + 2048	[just after return from copy mode]
+0001pLLL L*	ppppppCC pppppppp	3..9 + extend	15 bits		[pos 0 interpreted as EOF]
 001LLLLL L*	ppppppCC pppppppp	3..33 + extend	14 bits
-01\
-10 \
-11  \
-LLLpppCC		 pppppppp	3..8		11 bits
+LLLpppCC		 pppppppp	3..8		11 bits		[LLL >= 010]
 
-Copy mode :
------------
+Copy mode:
+----------
 
 Read the first byte and, if the most significant bits are 0000, perform the
 following command, otherwise switch to the compressed mode (and evaluate the
-command).
+command there).
 
 pattern					length		position
 
@@ -483,9 +474,4 @@ pattern					length		position
   Copy L characters from the compressed text to the output.  The overhead for
   incompressible strings is only roughly 1/256 + epsilon.
 
-(*) After reading one copy command, switch to the compressed mode with the
-following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
-and 2048 is added to the position (now it is slightly more than 11 bits),
-because a sequence of length 2 would never be used.
-
 */