lib/lizard.c

   1 /*
   2  *      LiZzaRd -- Fast compression method based on Lempel-Ziv 77
   3  *
   4  *      (c) 2004, Robert Spalek <robert@ucw.cz>
   5  *
   6  *      The file format is based on LZO1X and
   7  *      the compression method is based on zlib.
   8  */
   9
  10 #include "lib/lib.h"
  11 #include "lib/lizzard.h"
  12
  13 #include <string.h>
  14
  15 typedef u16 hash_ptr_t;
  16 struct hash_record {
  17   /* the position in the original text is implicit; it is computed by locate_string() */
  18   hash_ptr_t next;                      // 0=end
  19   hash_ptr_t prev;                      // high bit: 0=record in array, 1=head in hash-table (i.e. value of hashf)
  20 };
  21
  22 #define HASH_SIZE       (1<<14)         // size of hash-table
  23 #define HASH_RECORDS    (1<<15)         // maximum number of records in hash-table, 0 is unused ==> subtract 1
  24 #define CHAIN_MAX_TESTS         8       // crop longer collision chains
  25 #define CHAIN_GOOD_MATCH        32      // we already have a good match => end
  26
  27 static inline uns
  28 hashf(byte *string)
  29   /* 0..HASH_SIZE-1 */
  30 {
  31     return string[0] ^ (string[1]<<3) ^ (string[2]<<6);
  32 }
  33
  34 static inline byte *
  35 locate_string(byte *string, uns record_id, uns head)
  36   /* The strings are recorded into the hash-table regularly, hence there is no
  37    * need to store the pointer there.  */
  38 {
  39   string += record_id - head;
  40   if (record_id >= head)
  41     string -= HASH_RECORDS-1;
  42   return string;
  43 }
  44
  45 static inline uns
  46 find_match(uns record_id, struct hash_record *hash_rec, byte *string, byte *string_end, byte **best_ptr, uns head)
  47   /* hash_tab[hash] == record_id points to the head of the double-linked
  48    * link-list of strings with the same hash.  The records are statically
  49    * stored in circular array hash_rec (with the 1st entry unused), and the
  50    * pointers are just 16-bit indices.  The strings in every collision chain
  51    * are ordered by age.  */
  52 {
  53   uns count = CHAIN_MAX_TESTS;
  54   uns best_len = 0;
  55   while (record_id && count-- > 0)
  56   {
  57     byte *record_string = locate_string(string, record_id, head);
  58     byte *cmp = record_string;
  59     if (cmp[0] == string[0] && cmp[2] == string[2])
  60     /* implies cmp[1] == string[1] */
  61     {
  62       if (cmp[3] == string[3])
  63       {
  64         cmp += 4;
  65         if (*cmp++ == string[4] && *cmp++ == string[5]
  66             && *cmp++ == string[6] && *cmp++ == string[7])
  67         {
  68           byte *str = string + 8;
  69           while (str <= string_end && *cmp++ == *str++);
  70         }
  71       }
  72       else
  73         cmp += 4;
  74       uns len = cmp - record_string - 1;        /* cmp points 2 characters after the last match */
  75       if (len > best_len)
  76       {
  77         best_len = len;
  78         *best_ptr = record_string;
  79         if (best_len >= CHAIN_GOOD_MATCH)       /* optimization */
  80           break;
  81       }
  82     }
  83     record_id = hash_rec[record_id].next;
  84   }
  85   return best_len;
  86 }
  87
  88 static uns
  89 hash_string(hash_ptr_t *hash_tab, uns hash, struct hash_record *hash_rec, /*byte *string,*/ uns head, uns *to_delete)
  90   /* We reuse hash-records stored in a circular array.  First, delete the old
  91    * one and then add the new one in front of the link-list.  */
  92 {
  93   struct hash_record *rec = hash_rec + head;
  94   if (*to_delete)                               /* unlink the original record */
  95   {
  96     uns prev_id = rec->prev & ((1<<15)-1);
  97     if (rec->prev & (1<<15))                    /* was a head */
  98       hash_tab[prev_id] = 0;
  99     else                                        /* thanks to the ordering, this was a tail */
 100       hash_rec[prev_id].next = 0;
 101   }
 102   rec->next = hash_tab[hash];
 103   rec->prev = (1<<15) | hash;
 104   hash_rec[rec->next].prev = head;
 105   hash_tab[hash] = head;                        /* add the new record before the link-list */
 106
 107   if (++head >= HASH_RECORDS)                   /* circular buffer, reuse old records, 0 is unused */
 108   {
 109     head = 1;
 110     *to_delete = 1;
 111   }
 112   return head;
 113 }
 114
 115 static inline byte *
 116 dump_unary_value(byte *out, uns l)
 117 {
 118   while (l > 255)
 119   {
 120     l -= 255;
 121     *out++ = 0;
 122   }
 123   *out++ = l;
 124   return out;
 125 }
 126
 127 static byte *
 128 flush_copy_command(uns bof, byte *out, byte *start, uns len)
 129 {
 130   if (bof && len <= 238)
 131     *out++ = len + 17;
 132   else if (len < 4)
 133     /* cannot happen when !!bof */
 134     out[-2] |= len;                             /* invariant: lowest 2 bits 2 bytes back */
 135   else
 136   {
 137     /* leave 2 least significant bits of out[-2] set to 0 */
 138     if (len <= 18)
 139       *out++ = len - 3;
 140     else
 141     {
 142       *out++ = 0;
 143       out = dump_unary_value(out, len - 18);
 144     }
 145   }
 146   while (len-- > 0)
 147     *out++ = *start++;
 148   return out;
 149 }
 150
 151 int
 152 lizzard_compress(byte *in, uns in_len, byte *out)
 153   /* Requires out being allocated for at least in_len * LIZZARD_MAX_MULTIPLY +
 154    * LIZZARD_MAX_ADD.  There must be at least LIZZARD_NEEDS_CHARS characters
 155    * allocated after in.  Returns the actual compressed length. */
 156 {
 157   hash_ptr_t hash_tab[HASH_SIZE];
 158   struct hash_record hash_rec[HASH_RECORDS];
 159   byte *in_start = in;
 160   byte *in_end = in + in_len;
 161   byte *out_start = out;
 162   byte *copy_start = in;
 163   uns head = 1;                                 /* 0 in unused */
 164   uns to_delete = 0;
 165   bzero(hash_tab, sizeof(hash_tab));            /* init the hash-table */
 166   while (in < in_end)
 167   {
 168     uns hash = hashf(in);
 169     byte *best;
 170     uns len = find_match(hash_tab[hash], hash_rec, in, in_end, &best, head);
 171     if (len < 3)
 172 #if 0                   // TODO: now, our routine does not detect matches of length 2
 173       if (len == 2 && (in - best->string) < ((1<<10) + 1))
 174       { /* pass-thru */ }
 175       else
 176 #endif
 177       {
 178 literal:
 179         head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
 180         in++;                                   /* add a literal */
 181         continue;
 182       }
 183
 184     if (in + len > in_end)                      /* crop EOF */
 185     {
 186       len = in_end - in;
 187       if (len < 3)
 188         goto literal;
 189     }
 190     /* Record the match.  */
 191     uns copy_len = in - copy_start;
 192     uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
 193     uns shift = in - best - 1;
 194     /* Try to use a 2-byte sequence.  */
 195 #if 0
 196     if (len == 2)
 197     {
 198       if (is_in_copy_mode || !copy_len)         /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
 199         goto literal;
 200       else
 201         goto dump_2sequence;
 202     } else
 203 #endif
 204     if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11) + (1<<10))
 205     {
 206       /* optimisation for length-3 matches after a copy command */
 207       shift -= 1<<11;
 208 dump_2sequence:
 209       if (copy_len > 0)
 210         out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 211       *out++ = (shift>>6) & ~3;                 /* shift fits into 10 bits */
 212       *out++ = shift & 0xff;
 213     }
 214     /* now, len >= 3 */
 215     else if (shift < (1<<11) && len <= 8)
 216     {
 217       shift |= (len-3 + 2)<<11;
 218       goto dump_2sequence;                      /* shift has 11 bits and contains also len */
 219     }
 220     /* We have to use a 3-byte sequence.  */
 221     else if (len == 3 && is_in_copy_mode)
 222       /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
 223       goto literal;
 224     else
 225     {
 226       if (copy_len > 0)
 227         out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 228       if (shift < (1<<14))
 229       {
 230         if (len <= 33)
 231           *out++ = (1<<5) | (len-2);
 232         else
 233         {
 234           *out++ = 1<<5;
 235           out = dump_unary_value(out, len - 33);
 236         }
 237       }
 238       else /* shift < (1<<15)-1 becase of HASH_RECORDS */
 239       {
 240         shift++;                                /* because shift==0 is reserved for EOF */
 241         byte pos_bit = (shift>>11) & (1<<3);
 242         if (len <= 9)
 243           *out++ = (1<<4) | pos_bit | (len-2);
 244         else
 245         {
 246           *out++ = (1<<4) | pos_bit;
 247           out = dump_unary_value(out, len - 9);
 248         }
 249       }
 250       *out++ = (shift>>6) & ~3;                 /* rest of shift fits into 14 bits */
 251       *out++ = shift & 0xff;
 252     }
 253     /* Update the hash-table.  */
 254     head = hash_string(hash_tab, hash, hash_rec, head, &to_delete);
 255     for (uns i=1; i<len; i++)
 256       head = hash_string(hash_tab, hashf(in+i), hash_rec, head, &to_delete);
 257     in += len;
 258     copy_start = in;
 259   }
 260   if (in > in_end)                              /* crop at BOF */
 261     in = in_end;
 262   uns copy_len = in - copy_start;
 263   if (copy_len > 0)
 264     out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 265   *out++ = 17;                                  /* add EOF */
 266   *out++ = 0;
 267   *out++ = 0;
 268   return out - out_start;
 269 }
 270
 271 static inline byte *
 272 read_unary_value(byte *in, uns *val)
 273 {
 274   uns l = 0;
 275   while (!*in++)
 276     l += 255;
 277   l += in[-1];
 278   *val = l;
 279   return in;
 280 }
 281
 282 int
 283 lizzard_decompress(byte *in, byte *out)
 284   /* Requires out being allocated for the decompressed length must be known
 285    * beforehand.  It is desirable to lock the following memory page for
 286    * read-only access to prevent buffer overflow.  Returns the actual
 287    * decompressed length or a negative number when an error has occured.  */
 288 {
 289   byte *out_start = out;
 290   uns expect_copy_command = 1;
 291   uns len;
 292   if (*in > 17)                                 /* short copy command at BOF */
 293   {
 294     len = *in++ - 17;
 295     expect_copy_command = 2;
 296     goto perform_copy_command;
 297   }
 298   while (1)
 299   {
 300     uns c = *in++;
 301     uns pos;
 302     if (c < 0x10)
 303       if (expect_copy_command == 1)
 304       {
 305         if (!c)
 306         {
 307           in = read_unary_value(in, &len);
 308           len += 18;
 309         }
 310         else
 311           len = c + 3;
 312         expect_copy_command = 2;
 313         goto perform_copy_command;
 314       }
 315       else
 316       {
 317         pos = ((c&0xc)<<6) | *in++;
 318         if (expect_copy_command == 2)
 319         {
 320           pos += 1<<11;
 321           len = 3;
 322         }
 323         else
 324           len = 2;
 325         pos++;
 326       }
 327     else if (c < 0x20)
 328     {
 329       pos = (c&0x8)<<11;
 330       len = c&0x7;
 331       if (!len)
 332       {
 333         in = read_unary_value(in, &len);
 334         len += 9;
 335       }
 336       else
 337         len += 2;
 338       pos |= (*in++ & 0xfc)<<6;
 339       pos |= *in++;
 340       if (!pos)                                 /* EOF */
 341         break;
 342       /* do NOT pos++ */
 343     }
 344     else if (c < 0x40)
 345     {
 346       len = c&0x1f;
 347       if (!len)
 348       {
 349         in = read_unary_value(in, &len);
 350         len += 33;
 351       }
 352       else
 353         len += 2;
 354       pos = (*in++ & 0xfc)<<6;
 355       pos |= *in++;
 356       pos++;
 357     }
 358     else /* high bits encode the length */
 359     {
 360       len = ((c&0xe0)>>5) -2 +3;
 361       pos = (c&0x1c)<<6;
 362       pos |= *in++;
 363       pos++;
 364     }
 365     /* take from the sliding window */
 366     if (len <= pos)
 367     {
 368       memcpy(out, out-pos, len);
 369       out += len;
 370     }
 371     else
 372     {                                           /* overlapping */
 373       for (; len-- > 0; out++)
 374         *out = out[-pos];
 375     }
 376     /* extract the copy-bits */
 377     len = in[-2] & 0x3;
 378     if (len)
 379     {
 380       expect_copy_command = 0;
 381       while (len-- > 0)
 382         *out++ = *in++;
 383     }
 384     else
 385       expect_copy_command = 1;
 386     continue;
 387
 388 perform_copy_command:
 389     memcpy(out, in, len);
 390     out += len;
 391     in += len;
 392   }
 393
 394   return out - out_start;
 395 }
 396
 397 /*
 398
 399 Description of the LZO1X format :
 400 =================================
 401
 402 Beginning of file:
 403 ------------------
 404
 405 If the first byte is 00010001, it means probably EOF (empty file), so switch
 406 to the compressed mode.  If it is bigger, subtract 17 and copy this number of
 407 the following characters to the ouput and switch to the compressed mode.  If
 408 it is smaller, go to the copy mode.
 409
 410 Compressed mode :
 411 -----------------
 412
 413 Read the first byte of the sequence and determine the type of bit-encoding by
 414 looking at the most significant bits.  The sequence is always at least 2 bytes
 415 long.  Decode sequences of these types until the EOF or END marker is read.
 416
 417   length L = length of the text taken from the sliding window
 418
 419     If L=0, then count the number Z of the following zero bytes and add Z*255
 420     to the value of the following non-zero byte.  This allows setting L
 421     arbitrarily high.
 422
 423   position p = relative position of the beginning of the text
 424
 425     Exception: 00010001 00000000 00000000 means EOF
 426
 427   copying C = length 1..3 of copied characters or END=0
 428
 429     C following characters will be copied from the compressed text to the
 430     output.  The number CC is always stored in the 2 least significant bits of
 431     the second last byte of the sequence.
 432
 433     If END is read, the algorithm switches to the copy mode.
 434
 435 pattern                                 length          position
 436
 437 0000ppCC                 pppppppp       2               10 bits (*)
 438 0001pLLL L*     ppppppCC pppppppp       3..9 + extend   15 bits + EOF
 439 001LLLLL L*     ppppppCC pppppppp       3..33 + extend  14 bits
 440 01\
 441 10 \
 442 11  \
 443 LLLpppCC                 pppppppp       3..8            11 bits
 444
 445 Copy mode :
 446 -----------
 447
 448 Read the first byte and, if the most significant bits are 0000, perform the
 449 following command, otherwise switch to the compressed mode (and evaluate the
 450 command).
 451
 452 pattern                                 length          position
 453
 454 0000LLLL L*                             4..18 + extend  N/A
 455
 456   Copy L characters from the compressed text to the output.  The overhead for
 457   incompressible strings is only roughly 1/256 + epsilon.
 458
 459 (*) After reading one copy command, switch to the compressed mode with the
 460 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
 461 and 2048 is added to the position (now it is slightly more than 11 bits),
 462 because a sequence of length 2 would never be used.
 463
 464 */