lib/lizard.c

   1 /*
   2  *      LiZaRd -- Fast compression method based on Lempel-Ziv 77
   3  *
   4  *      (c) 2004, Robert Spalek <robert@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  *
   9  *      The file format is based on LZO1X and
  10  *      the compression method is based on zlib.
  11  */
  12
  13 #include "lib/lib.h"
  14 #include "lib/lizard.h"
  15
  16 #include <string.h>
  17
  18 typedef u16 hash_ptr_t;
  19 struct hash_record {
  20   /* the position in the original text is implicit; it is computed by locate_string() */
  21   hash_ptr_t next;                      // 0=end
  22   hash_ptr_t prev;                      // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
  23 };
  24
  25 #define HASH_SIZE       (1<<14)         // size of hash-table
  26 #define HASH_RECORDS    (1<<15)         // maximum number of records in hash-table, 0 is unused ==> subtract 1
  27 #define CHAIN_MAX_TESTS         8       // crop longer collision chains
  28 #define CHAIN_GOOD_MATCH        32      // we already have a good match => end
  29
  30 static inline uns
  31 hashf(byte *string)
  32   /* 0..HASH_SIZE-1 */
  33 {
  34     return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
  35 }
  36
  37 static inline byte *
  38 locate_string(byte *string, uns record_id, uns head)
  39   /* The strings are recorded into the hash-table regularly, hence there is no
  40    * need to store the pointer there.  */
  41 {
  42   string += record_id - head;
  43   if (record_id >= head)
  44     string -= HASH_RECORDS-1;
  45   return string;
  46 }
  47
  48 static inline uns
  49 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
  50   /* hash_tab[hash] == record_id points to the head of the double-linked
  51    * link-list of strings with the same hash.  The records are statically
  52    * stored in circular array hash_rec (with the 1st entry unused), and the
  53    * pointers are just 16-bit indices.  The strings in every collision chain
  54    * are ordered by age.  */
  55 {
  56   uns count = CHAIN_MAX_TESTS;
  57   uns best_len = 0;
  58   while (record_id && count-- > 0)
  59   {
  60     byte *record_string = locate_string(string, record_id, head);
  61     byte *cmp = record_string;
  62     if (cmp[0] == string[0] && cmp[2] == string[2])
  63     /* implies cmp[1] == string[1] */
  64     {
  65       if (cmp[3] == string[3])
  66       {
  67         cmp += 4;
  68         if (*cmp++ == string[4] && *cmp++ == string[5]
  69             && *cmp++ == string[6] && *cmp++ == string[7])
  70         {
  71           byte *str = string + 8;
  72           while (str <= string_end && *cmp++ == *str++);
  73         }
  74       }
  75       else
  76         cmp += 4;
  77       uns len = cmp - record_string - 1;        /* cmp points 2 characters after the last match */
  78       if (len > best_len)
  79       {
  80         best_len = len;
  81         *best_ptr = record_string;
  82         if (best_len >= CHAIN_GOOD_MATCH)       /* optimization */
  83           break;
  84       }
  85     }
  86     record_id = hash_rec[record_id].next;
  87   }
  88   return best_len;
  89 }
  90
  91 static uns
  92 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
  93   /* We reuse hash-records stored in a circular array.  First, delete the old
  94    * one and then add the new one in front of the link-list.  */
  95 {
  96   struct hash_record *rec = hash_rec + head;
  97   if (*to_delete)                               /* unlink the original record */
  98   {
  99     uns prev_id = rec->prev & ((1<<15)-1);
 100     if (rec->prev & (1<<15))                    /* was a head */
 101       hash_tab[prev_id] = 0;
 102     else                                        /* thanks to the ordering, this was a tail */
 103       hash_rec[prev_id].next = 0;
 104   }
 105   rec->next = hash_tab[hash];
 106   rec->prev = (1<<15) | hash;
 107   hash_rec[rec->next].prev = head;
 108   hash_tab[hash] = head;                        /* add the new record before the link-list */
 109
 110   if (++head >= HASH_RECORDS)                   /* circular buffer, reuse old records, 0 is unused */
 111   {
 112     head = 1;
 113     *to_delete = 1;
 114   }
 115   return head;
 116 }
 117
 118 static inline byte *
 119 dump_unary_value(byte *out, uns l)
 120 {
 121   while (l > 255)
 122   {
 123     l -= 255;
 124     *out++ = 0;
 125   }
 126   *out++ = l;
 127   return out;
 128 }
 129
 130 static byte *
 131 flush_copy_command(uns bof, byte *out, byte *start, uns len)
 132 {
 133   if (bof && len <= 238)
 134     *out++ = len + 17;
 135   else if (len < 4)
 136   {
 137     /* cannot happen when !!bof */
 138     out[-2] |= len;                             /* invariant: lowest 2 bits 2 bytes back */
 139 #ifdef  CPU_ALLOW_UNALIGNED
 140     * (u32*) out = * (u32*) start;
 141     return out + len;
 142 #else
 143     while (len-- > 0)
 144       *out++ = *start++;
 145     return out;
 146 #endif
 147   }
 148   else
 149   {
 150     /* leave 2 least significant bits of out[-2] set to 0 */
 151     if (len <= 18)
 152       *out++ = len - 3;
 153     else
 154     {
 155       *out++ = 0;
 156       out = dump_unary_value(out, len - 18);
 157     }
 158   }
 159   memcpy(out, start, len);
 160   return out + len;
 161 }
 162
 163 int
 164 lizard_compress(byte *in, uns in_len, byte *out)
 165   /* Requires out being allocated for at least in_len * LIZARD_MAX_MULTIPLY +
 166    * LIZARD_MAX_ADD.  There must be at least LIZARD_NEEDS_CHARS characters
 167    * allocated after in.  Returns the actual compressed length. */
 168 {
 169   hash_ptr_t hash_tab[HASH_SIZE];
 170   struct hash_record hash_rec[HASH_RECORDS];
 171   byte *in_end = in + in_len;
 172   byte *out_start = out;
 173   byte *copy_start = in;
 174   uns head = 1;                                 /* 0 in unused */
 175   uns to_delete = 0, bof = 1;
 176   bzero(hash_tab, sizeof(hash_tab));            /* init the hash-table */
 177   while (in < in_end)
 178   {
 179     uns hash = hashf(in);
 180     byte *best;
 181     uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
 182     if (len < 3)
 183 #if 0                   // TODO: now, our routine does not detect matches of length 2
 184       if (len == 2 && (in - best->string) < ((1<<10) + 1))
 185       { /* pass-thru */ }
 186       else
 187 #endif
 188       {
 189 literal:
 190         head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
 191         in++;                                   /* add a literal */
 192         continue;
 193       }
 194
 195     if (in + len > in_end)                      /* crop EOF */
 196     {
 197       len = in_end - in;
 198       if (len < 3)
 199         goto literal;
 200     }
 201     /* Record the match.  */
 202     uns copy_len = in - copy_start;
 203     uns is_in_copy_mode = bof || copy_len >= 4;
 204     uns shift = in - best - 1;
 205     /* Try to use a 2-byte sequence.  */
 206 #if 0
 207     if (len == 2)
 208     {
 209       if (is_in_copy_mode || !copy_len)         /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
 210         goto literal;
 211       else
 212         goto dump_2sequence;
 213     } else
 214 #endif
 215     if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
 216     {
 217       /* optimisation for length-3 matches after a copy command */
 218       shift -= 1<<11;
 219 dump_2sequence:
 220       if (copy_len)
 221         out = flush_copy_command(bof, out, copy_start, copy_len);
 222       *out++ = (shift>>6) & ~3;                 /* shift fits into 10 bits */
 223       *out++ = shift & 0xff;
 224     }
 225     /* now, len >= 3 */
 226     else if (shift < (1<<11) && len <= 8)
 227     {
 228       shift |= (len-3 + 2)<<11;
 229       goto dump_2sequence;                      /* shift has 11 bits and contains also len */
 230     }
 231     /* We have to use a 3-byte sequence.  */
 232     else if (len == 3 && is_in_copy_mode)
 233       /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
 234       goto literal;
 235     else
 236     {
 237       if (copy_len)
 238         out = flush_copy_command(bof, out, copy_start, copy_len);
 239       if (shift < (1<<14))
 240       {
 241         if (len <= 33)
 242           *out++ = (1<<5) | (len-2);
 243         else
 244         {
 245           *out++ = 1<<5;
 246           out = dump_unary_value(out, len - 33);
 247         }
 248       }
 249       else /* shift < (1<<15)-1 becase of HASH_RECORDS */
 250       {
 251         shift++;                                /* because shift==0 is reserved for EOF */
 252         byte pos_bit = ((shift>>11) & (1<<3)) | (1<<4);
 253         if (len <= 9)
 254           *out++ = pos_bit | (len-2);
 255         else
 256         {
 257           *out++ = pos_bit;
 258           out = dump_unary_value(out, len - 9);
 259         }
 260       }
 261       *out++ = (shift>>6) & ~3;                 /* rest of shift fits into 14 bits */
 262       *out++ = shift & 0xff;
 263     }
 264     /* Update the hash-table.  */
 265     head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
 266     for (uns i=1; i<len; i++)
 267       head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
 268     in += len;
 269     copy_start = in;
 270     bof = 0;
 271   }
 272   uns copy_len = in - copy_start;
 273   if (copy_len)
 274     out = flush_copy_command(bof, out, copy_start, copy_len);
 275   *out++ = 17;                                  /* add EOF */
 276   *out++ = 0;
 277   *out++ = 0;
 278   return out - out_start;
 279 }
 280
 281 static inline byte *
 282 read_unary_value(byte *in, uns *val)
 283 {
 284   uns l = 0;
 285   while (!*in++)
 286     l += 255;
 287   l += in[-1];
 288   *val = l;
 289   return in;
 290 }
 291
 292 int
 293 lizard_decompress(byte *in, byte *out)
 294   /* Requires out being allocated for the decompressed length must be known
 295    * beforehand.  It is desirable to lock the following memory page for
 296    * read-only access to prevent buffer overflow.  Returns the actual
 297    * decompressed length or a negative number when an error has occured.  */
 298 {
 299   byte *out_start = out;
 300   uns expect_copy_command = 1;
 301   uns len;
 302   if (*in > 17)                                 /* short copy command at BOF */
 303   {
 304     len = *in++ - 17;
 305     goto perform_copy_command;
 306   }
 307   while (1)
 308   {
 309     uns c = *in++;
 310     uns pos;
 311     if (c < 0x10)
 312       if (expect_copy_command == 1)
 313       {
 314         if (!c)
 315         {
 316           in = read_unary_value(in, &len);
 317           len += 18;
 318         }
 319         else
 320           len = c + 3;
 321         goto perform_copy_command;
 322       }
 323       else
 324       {
 325         pos = ((c&0xc)<<6) | *in++;
 326         if (expect_copy_command == 2)
 327         {
 328           pos += 1<<11;
 329           len = 3;
 330         }
 331         else
 332           len = 2;
 333         pos++;
 334       }
 335     else if (c < 0x20)
 336     {
 337       pos = (c&0x8)<<11;
 338       len = c&0x7;
 339       if (!len)
 340       {
 341         in = read_unary_value(in, &len);
 342         len += 9;
 343       }
 344       else
 345         len += 2;
 346       pos |= (*in++ & 0xfc)<<6;
 347       pos |= *in++;
 348       if (!pos)                                 /* EOF */
 349         break;
 350       /* do NOT pos++ */
 351     }
 352     else if (c < 0x40)
 353     {
 354       len = c&0x1f;
 355       if (!len)
 356       {
 357         in = read_unary_value(in, &len);
 358         len += 33;
 359       }
 360       else
 361         len += 2;
 362       pos = (*in++ & 0xfc)<<6;
 363       pos |= *in++;
 364       pos++;
 365     }
 366     else /* high bits encode the length */
 367     {
 368       len = ((c&0xe0)>>5) -2 +3;
 369       pos = (c&0x1c)<<6;
 370       pos |= *in++;
 371       pos++;
 372     }
 373     /* take from the sliding window */
 374     if (len <= pos)
 375     {
 376       memcpy(out, out-pos, len);
 377       out += len;
 378     }
 379     else
 380     {                                           /* overlapping */
 381       for (; len-- > 0; out++)
 382         *out = out[-pos];
 383     }
 384     /* extract the copy-bits */
 385     len = in[-2] & 0x3;
 386     if (len)
 387     {
 388       expect_copy_command = 0;
 389 #ifdef  CPU_ALLOW_UNALIGNED
 390       * (u32*) out = * (u32*) in;
 391       out += len;
 392       in += len;
 393 #else
 394       while (len-- > 0)
 395         *out++ = *in++;
 396 #endif
 397     }
 398     else
 399       expect_copy_command = 1;
 400     continue;
 401
 402 perform_copy_command:
 403     expect_copy_command = 2;
 404     memcpy(out, in, len);
 405     out += len;
 406     in += len;
 407   }
 408
 409   return out - out_start;
 410 }
 411
 412 /*
 413
 414 Description of the LZO1X format :
 415 =================================
 416
 417 Beginning of file:
 418 ------------------
 419
 420 If the first byte is 00010001, it means probably EOF (empty file), so switch
 421 to the compressed mode.  If it is bigger, subtract 17 and copy this number of
 422 the following characters to the ouput and switch to the compressed mode.  If
 423 it is smaller, go to the copy mode.
 424
 425 Compressed mode :
 426 -----------------
 427
 428 Read the first byte of the sequence and determine the type of bit-encoding by
 429 looking at the most significant bits.  The sequence is always at least 2 bytes
 430 long.  Decode sequences of these types until the EOF or END marker is read.
 431
 432   length L = length of the text taken from the sliding window
 433
 434     If L=0, then count the number Z of the following zero bytes and add Z*255
 435     to the value of the following non-zero byte.  This allows setting L
 436     arbitrarily high.
 437
 438   position p = relative position of the beginning of the text
 439
 440     Exception: 00010001 00000000 00000000 means EOF
 441
 442   copying C = length 1..3 of copied characters or END=0
 443
 444     C following characters will be copied from the compressed text to the
 445     output.  The number CC is always stored in the 2 least significant bits of
 446     the second last byte of the sequence.
 447
 448     If END is read, the algorithm switches to the copy mode.
 449
 450 pattern                                 length          position
 451
 452 0000ppCC                 pppppppp       2               10 bits (*)
 453 0001pLLL L*     ppppppCC pppppppp       3..9 + extend   15 bits + EOF
 454 001LLLLL L*     ppppppCC pppppppp       3..33 + extend  14 bits
 455 01\
 456 10 \
 457 11  \
 458 LLLpppCC                 pppppppp       3..8            11 bits
 459
 460 Copy mode :
 461 -----------
 462
 463 Read the first byte and, if the most significant bits are 0000, perform the
 464 following command, otherwise switch to the compressed mode (and evaluate the
 465 command).
 466
 467 pattern                                 length          position
 468
 469 0000LLLL L*                             4..18 + extend  N/A
 470
 471   Copy L characters from the compressed text to the output.  The overhead for
 472   incompressible strings is only roughly 1/256 + epsilon.
 473
 474 (*) After reading one copy command, switch to the compressed mode with the
 475 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
 476 and 2048 is added to the position (now it is slightly more than 11 bits),
 477 because a sequence of length 2 would never be used.
 478
 479 */