lib/lizard.c

   1 /*
   2  *      LiZzaRd -- Fast compression method based on Lempel-Ziv 77
   3  *
   4  *      (c) 2004, Robert Spalek <robert@ucw.cz>
   5  *
   6  *      The file format is based on LZO1X and
   7  *      the compression method is based on zlib.
   8  */
   9
  10 #include "lib/lib.h"
  11 #include "lib/lizzard.h"
  12
  13 #include <string.h>
  14
  15 typedef u16 hash_ptr_t;
  16 struct hash_record {
  17   byte *string;                         // in original text
  18   hash_ptr_t next4, next3;              // 0=end
  19 };
  20
  21 #define HASH_SIZE       ((1<<14) + 27)  // prime, size of hash-table
  22 #define HASH_RECORDS    ((1<<15) - 1)   // maximum number of records in hash-table
  23 #define CHAIN_MAX_TESTS 16              // crop longer collision chains
  24 #define CHAIN_GOOD_MATCH        16      // we already have a good match => end
  25
  26 static inline uns
  27 hashf(byte *string)
  28 {
  29     uns hash;
  30 #ifdef  CPU_ALLOW_UNALIGNED
  31     hash = * (uns *) string;
  32 #elif   CPU_LITTLE_ENDIAN
  33     hash = string[0] | (string[1]<<8) | (string[2]<<16) | (string[3]<<24);
  34 #else
  35     hash = string[3] | (string[2]<<8) | (string[1]<<16) | (string[0]<<24);
  36 #endif
  37     return hash;
  38 }
  39 #ifdef  CPU_LITTLE_ENDIAN
  40 #define MASK_4TH_CHAR   (~(0xff << 24))
  41 #else
  42 #define MASK_4TH_CHAR   (~0xff)
  43 #endif
  44
  45 static inline uns
  46 find_match(uns list_id, hash_ptr_t *hash_tab, struct hash_record *hash_rec, uns hash, byte *string, byte *string_end, byte **best_ptr)
  47   /* We heavily rely on gcc optimisations (e.g. list_id is a constant hence
  48    * some branches of the code will be pruned).
  49    *
  50    * hash_tab[hash] points to the 1st record of the link-list of strings with the
  51    * same hash.  The records are statically stored in circular array hash_rec
  52    * (with 1st entry unused), and the pointers are just 16-bit indices.  In one
  53    * array, we store 2 independent link-lists: with 3 and 4 hashed characters.
  54    *
  55    * Updates are performed more often than searches, so they are lazy: we only
  56    * add new records in front of link-lists and never delete them.  By reusing
  57    * an already allocated space in the circular array we necessarily modify
  58    * records that are already pointed at in the middle of some link-list.  To
  59    * prevent confusion, this function checks 2 invariants: the strings in a
  60    * collision chain are ordered, and the hash of the 1st record in the chain
  61    * must match.  */
  62 {
  63   if (list_id == 3)                             /* Find the 1st record in the collision chain.  */
  64     hash = hash & MASK_4TH_CHAR;
  65   hash %= HASH_SIZE;
  66   if (!hash_tab[hash])
  67     return 0;
  68   struct hash_record *record = hash_rec + hash_tab[hash];
  69   uns hash2 = hashf(record->string);
  70   if (list_id == 3)
  71     hash2 = hash2 & MASK_4TH_CHAR;
  72   hash2 %= HASH_SIZE;
  73   if (hash2 != hash)                            /* Verify the hash-value.  */
  74   {
  75     hash_tab[hash] = 0;                         /* prune */
  76     return 0;
  77   }
  78
  79   uns count = CHAIN_MAX_TESTS;
  80   uns best_len = 0;
  81   byte *last_string = string_end;
  82   hash_ptr_t *last_ptr = NULL;
  83   while (count-- > 0)
  84   {
  85     byte *cmp = record->string;
  86     if (cmp >= last_string)                     /* collision chain is ordered by age */
  87     {
  88       *last_ptr = 0;                            /* prune */
  89       break;
  90     }
  91     if (cmp[0] == string[0] && cmp[2] == string[2])
  92     {
  93       if (cmp[3] == string[3])
  94       {
  95         /* implies cmp[1] == string[1], becase we hash into 14+eps bits */
  96         cmp += 4;
  97         if (*cmp++ == string[4] && *cmp++ == string[5]
  98             && *cmp++ == string[6] && *cmp++ == string[7])
  99         {
 100           byte *str = string + 8;
 101           while (str <= string_end && *cmp++ == *string++);
 102         }
 103       }
 104       else if (list_id == 3)
 105         /* hashing 3 characters implies cmp[1] == string[1]
 106            hashing 4 characters implies cmp[1] != string[1] */
 107         cmp += 4;
 108       else
 109         goto next_collision;
 110       uns len = cmp - record->string - 1;       /* cmp points 2 characters after the last match */
 111       if (len > best_len)
 112       {
 113         best_len = len;
 114         *best_ptr = record->string;
 115         if (best_len >= CHAIN_GOOD_MATCH)       /* optimisation */
 116           break;
 117       }
 118     }
 119 next_collision:
 120     if (list_id == 3)
 121     {
 122       if (!record->next3)
 123         break;
 124       last_ptr = &record->next3;
 125       record = hash_rec + record->next3;
 126     }
 127     else
 128     {
 129       if (!record->next4)
 130         break;
 131       last_ptr = &record->next4;
 132       record = hash_rec + record->next4;
 133     }
 134   }
 135   return best_len;
 136 }
 137
 138 static inline uns
 139 hash_string(byte *string, uns hash, hash_ptr_t *tab3, hash_ptr_t *tab4, struct hash_record *hash_rec, uns head)
 140 {
 141   uns h3 = (hash & MASK_4TH_CHAR) % HASH_SIZE;
 142   uns h4 = hash % HASH_SIZE;
 143   struct hash_record *rec = hash_rec + head;
 144   rec->string = string;
 145   rec->next3 = tab3[h3];
 146   rec->next4 = tab4[h4];
 147   tab3[h3] = head;                              /* add the new record before the link-list */
 148   tab4[h4] = head;
 149   head++;                                       /* circular buffer, reuse old records */
 150   if (head >= HASH_RECORDS)
 151     head = 0;
 152   return head;
 153 }
 154
 155 static inline byte *
 156 dump_unary_value(byte *out, uns l)
 157 {
 158   while (l > 255)
 159   {
 160     l -= 255;
 161     *out++ = 0;
 162   }
 163   *out++ = l;
 164   return out;
 165 }
 166
 167 static byte *
 168 flush_copy_command(uns bof, byte *out, byte *start, uns len)
 169 {
 170   if (bof && len <= 238)
 171     *out++ = len + 17;
 172   else if (len < 4)
 173     /* cannot happen when !!bof */
 174     out[-2] |= len;                             /* invariant: lowest 2 bits 2 bytes back */
 175   else
 176   {
 177     /* leave 2 least significant bits of out[-2] set to 0 */
 178     if (len <= 18)
 179       *out++ = len - 3;
 180     else
 181     {
 182       *out++ = 0;
 183       out = dump_unary_value(out, len - 18);
 184     }
 185   }
 186   while (len-- > 0)
 187     *out++ = *start++;
 188   return out;
 189 }
 190
 191 int
 192 lizzard_compress(byte *in, uns in_len, byte *out)
 193   /* Requires out being allocated for at least in_len * LIZZARD_MAX_PROLONG_FACTOR.
 194    * There must be at least 8 characters allocated after in.
 195    * Returns the actual compressed length. */
 196 {
 197   hash_ptr_t hash_tab3[HASH_SIZE], hash_tab4[HASH_SIZE];
 198   struct hash_record hash_rec[HASH_RECORDS];
 199   byte *in_start = in;
 200   byte *in_end = in + in_len;
 201   byte *out_start = out;
 202   byte *copy_start = in;
 203   uns head = 0;
 204   bzero(hash_tab3, sizeof(hash_tab3));          /* init the hash-table */
 205   bzero(hash_tab4, sizeof(hash_tab4));
 206   while (in < in_end)
 207   {
 208     uns hash = hashf(in);
 209     byte *best;
 210     uns len = find_match(4, hash_tab4, hash_rec, hash, in, in_end, &best);
 211     if (len >= 3)
 212       goto match;
 213     len = find_match(3, hash_tab3, hash_rec, hash, in, in_end, &best);
 214
 215 match_found:
 216     if (len >= 3)
 217       goto match;
 218 #if 0                   // TODO: now, our routine does not detect matches of length 2
 219     if (len == 2 && (in - best->string) < ((1<<10) + 1))
 220       goto match;
 221 #endif
 222 literal:
 223     head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
 224     in++;                                       /* add a literal */
 225     continue;
 226
 227 match:
 228     if (in + len > in_end)                      /* crop EOF */
 229     {
 230       len = in_end - in;
 231       if (len < 3)
 232         goto match_found;
 233     }
 234     /* Record the match.  */
 235     uns copy_len = in - copy_start;
 236     uns is_in_copy_mode = copy_start==in_start || copy_len >= 4;
 237     uns shift = in - best - 1;
 238     /* Try to use a 2-byte sequence.  */
 239     if (len == 2)
 240     {
 241       if (is_in_copy_mode || !copy_len)         /* cannot use with 0 copied characters, because this bit pattern is reserved for copy mode */
 242         goto literal;
 243
 244 dump_2sequence:
 245       if (copy_len > 0)
 246         out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 247       *out++ = (shift>>6) & ~3;                 /* shift fits into 10 bits */
 248       *out++ = shift & 0xff;
 249     }
 250     else if (len == 3 && is_in_copy_mode && shift >= (1<<11) && shift < (1<<11 + 1<<10))
 251     {
 252       /* optimisation for length-3 matches after a copy command */
 253       shift -= 1<<11;
 254       goto dump_2sequence;
 255     }
 256     /* now, len >= 3 */
 257     else if (shift < (1<<11) && len <= 8)
 258     {
 259       shift |= (len-3 + 2)<<11;
 260       goto dump_2sequence;                      /* shift has 11 bits and contains also len */
 261     }
 262     /* We have to use a 3-byte sequence.  */
 263     else if (len == 3 && is_in_copy_mode)
 264       /* avoid 3-sequence compressed to 3 sequence if it can simply be appended */
 265       goto literal;
 266     else
 267     {
 268       if (copy_len > 0)
 269         out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 270       if (shift < (1<<14))
 271       {
 272         if (len <= 33)
 273           *out++ = (1<<5) | (len-2);
 274         else
 275         {
 276           *out++ = 1<<5;
 277           out = dump_unary_value(out, len - 33);
 278         }
 279       }
 280       else /* shift < (1<<15)-1 becase of HASH_RECORDS */
 281       {
 282         shift++;                                /* because shift==0 is reserved for EOF */
 283         byte pos_bit = (shift>>11) & (1<<3);
 284         if (len <= 9)
 285           *out++ = (1<<4) | pos_bit | (len-2);
 286         else
 287         {
 288           *out++ = (1<<4) | pos_bit;
 289           out = dump_unary_value(out, len - 9);
 290         }
 291       }
 292       *out++ = (shift>>6) & ~3;                 /* rest of shift fits into 14 bits */
 293       *out++ = shift & 0xff;
 294     }
 295     /* Update the hash-table.  */
 296     head = hash_string(in, hash, hash_tab3, hash_tab4, hash_rec, head);
 297     for (uns i=1; i<len; i++)
 298       head = hash_string(in+i, hashf(in+i), hash_tab3, hash_tab4, hash_rec, head);
 299     in += len;
 300     copy_start = in;
 301   }
 302   if (in > in_end)                              /* crop at BOF */
 303     in = in_end;
 304   uns copy_len = in - copy_start;
 305   if (copy_len > 0)
 306     out = flush_copy_command(copy_start==in_start, out, copy_start, copy_len);
 307   *out++ = 17;                                  /* add EOF */
 308   *out++ = 0;
 309   *out++ = 0;
 310   return out - out_start;
 311 }
 312
 313 static inline byte *
 314 read_unary_value(byte *in, uns *val)
 315 {
 316   uns l = 0;
 317   while (!*in++)
 318     l += 255;
 319   l += in[-1];
 320   *val = l;
 321   return in;
 322 }
 323
 324 int
 325 lizzard_decompress(byte *in, byte *out)
 326   /* Requires out being allocated for the decompressed length must be known
 327    * beforehand.  It is desirable to lock the following memory page for
 328    * read-only access to prevent buffer overflow.  Returns the actual
 329    * decompressed length or a negative number when an error has occured.  */
 330 {
 331   byte *out_start = out;
 332   uns expect_copy_command = 1;
 333   uns len;
 334   if (*in > 17)                                 /* short copy command at BOF */
 335   {
 336     len = *in++ - 17;
 337     expect_copy_command = 2;
 338     goto perform_copy_command;
 339   }
 340   while (1)
 341   {
 342     uns c = *in++;
 343     uns pos;
 344     if (c < 0x10)
 345       if (expect_copy_command == 1)
 346       {
 347         if (!c)
 348           in = read_unary_value(in, &len);
 349         else
 350           len = c;
 351         expect_copy_command = 2;
 352         goto perform_copy_command;
 353       }
 354       else
 355       {
 356         pos = (c&0xc)<<6 | *in++;
 357         if (expect_copy_command == 2)
 358         {
 359           pos += 1<<11;
 360           len = 3;
 361         }
 362         else
 363           len = 2;
 364       }
 365     else if (c < 0x20)
 366     {
 367       pos = (c&0x8)<<11;
 368       len = c&0x7;
 369       if (len)
 370       {
 371         in = read_unary_value(in, &len);
 372         len += 9;
 373       }
 374       else
 375         len += 2;
 376       pos |= (*in++ & 0xfc)<<6;
 377       pos |= *in++;
 378       if (!pos)                                 /* EOF */
 379         break;
 380       else                                      /* shift it back */
 381         pos--;
 382     }
 383     else if (c < 0x40)
 384     {
 385       len = c&0x1f;
 386       if (len)
 387       {
 388         in = read_unary_value(in, &len);
 389         len += 33;
 390       }
 391       else
 392         len += 2;
 393       pos = (*in++ & 0xfc)<<6;
 394       pos |= *in++;
 395     }
 396     else /* high bits encode the length */
 397     {
 398       len = (c&0xe)>>5 -2 +3;
 399       pos = (c&0x1c)<<6;
 400       pos |= *in++;
 401     }
 402     /* take from the sliding window */
 403     memcpy(out, in-1-pos, len);                 //FIXME: overlapping
 404     out += len;
 405     /* extract the copy-bits */
 406     len = in[-2] & 0x3;
 407     if (len)
 408       expect_copy_command = 0;                  /* and fall-thru */
 409     else
 410     {
 411       expect_copy_command = 1;
 412       continue;
 413     }
 414
 415 perform_copy_command:
 416     memcpy(out, in, len);
 417     out += len;
 418     in += len;
 419   }
 420
 421   return out - out_start;
 422 }
 423
 424 /*
 425
 426 Description of the LZO1X format :
 427 =================================
 428
 429 Beginning of file:
 430 ------------------
 431
 432 If the first byte is 00010001, it means probably EOF (empty file), so switch
 433 to the compressed mode.  If it is bigger, subtract 17 and copy this number of
 434 the following characters to the ouput and switch to the compressed mode.  If
 435 it is smaller, go to the copy mode.
 436
 437 Compressed mode :
 438 -----------------
 439
 440 Read the first byte of the sequence and determine the type of bit-encoding by
 441 looking at the most significant bits.  The sequence is always at least 2 bytes
 442 long.  Decode sequences of these types until the EOF or END marker is read.
 443
 444   length L = length of the text taken from the sliding window
 445
 446     If L=0, then count the number Z of the following zero bytes and add Z*255
 447     to the value of the following non-zero byte.  This allows setting L
 448     arbitrarily high.
 449
 450   position p = relative position of the beginning of the text
 451
 452     Exception: 00010001 00000000 00000000 means EOF
 453
 454   copying C = length 1..3 of copied characters or END=0
 455
 456     C following characters will be copied from the compressed text to the
 457     output.  The number CC is always stored in the 2 least significant bits of
 458     the second last byte of the sequence.
 459
 460     If END is read, the algorithm switches to the copy mode.
 461
 462 pattern                                 length          position
 463
 464 0000ppCC                 pppppppp       2               10 bits (*)
 465 0001pLLL L*     ppppppCC pppppppp       3..9 + extend   15 bits + EOF
 466 001LLLLL L*     ppppppCC pppppppp       3..33 + extend  14 bits
 467 01\
 468 10 \
 469 11  \
 470 LLLpppCC                 pppppppp       3..8            11 bits
 471
 472 Copy mode :
 473 -----------
 474
 475 Read the first byte and, if the most significant bits are 0000, perform the
 476 following command, otherwise switch to the compressed mode (and evaluate the
 477 command).
 478
 479 pattern                                 length          position
 480
 481 0000LLLL L*                             4..18 + extend  N/A
 482
 483   Copy L characters from the compressed text to the output.  The overhead for
 484   incompressible strings is only roughly 1/256 + epsilon.
 485
 486 (*) After reading one copy command, switch to the compressed mode with the
 487 following "optimisation": the pattern 0000ppCC expands to length 3 instead of 2
 488 and 2048 is added to the position (now it is slightly more than 11 bits),
 489 because a sequence of length 2 would never be used.
 490
 491 */