ucw/lizard.h

   1 /*
   2  *      LiZaRd -- Fast compression method based on Lempel-Ziv 77
   3  *
   4  *      (c) 2004, Robert Spalek <robert@ucw.cz>
   5  *
   6  *      This software may be freely distributed and used according to the terms
   7  *      of the GNU Lesser General Public License.
   8  */
   9
  10 #ifndef _UCW_LIZARD_H
  11 #define _UCW_LIZARD_H
  12
  13 #ifdef CONFIG_UCW_CLEAN_ABI
  14 #define adler32_update ucw_adler32_update
  15 #define lizard_alloc ucw_lizard_alloc
  16 #define lizard_compress ucw_lizard_compress
  17 #define lizard_decompress ucw_lizard_decompress
  18 #define lizard_decompress_safe ucw_lizard_decompress_safe
  19 #define lizard_free ucw_lizard_free
  20 #endif
  21
  22 /***
  23  * [[basic]]
  24  * Basic application
  25  * -----------------
  26  **/
  27
  28 /**
  29  * The compression routine needs input buffer 8 characters longer, because it
  30  * does not check the input bounds all the time.
  31  **/
  32 #define LIZARD_NEEDS_CHARS      8
  33
  34 #define LIZARD_MAX_MULTIPLY     23./22
  35 #define LIZARD_MAX_ADD          4
  36   /* In the worst case, the compressed file will not be longer than its
  37    * original length * 23/22 + 4.
  38    *
  39    * The additive constant is for EOF and the header of the file.
  40    *
  41    * The multiplicative constant comes from 19-byte incompressible string
  42    * followed by a 3-sequence that can be compressed into 2-byte link.  This
  43    * breaks the copy-mode and it needs to be restarted with a new header.  The
  44    * total length is 2(header) + 19(string) + 2(link) = 23.
  45    */
  46
  47 /**
  48  * The compressed data will not be longer than `LIZARD_MAX_LEN(input_length)`.
  49  * Note that `LIZARD_MAX_LEN(length) > length` (this is not a problem of the algorithm,
  50  * every lossless compression algorithm must have an input for which it produces a larger
  51  * output).
  52  *
  53  * Use this to compute the size of @out paramater of @lizard_compress().
  54  **/
  55 #define LIZARD_MAX_LEN(LENGTH) ((LENGTH) * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD)
  56
  57 /* lizard.c */
  58
  59 /**
  60  * Compress data provided in @in.
  61  * The input buffer must be at last `@in_len + <<def_LIZARD_NEEDS_CHARS,LIZARD_NEEDS_CHARS>>`
  62  * long (the compression algorithm does not check the bounds all the time).
  63  *
  64  * The output will be stored in @out. The @out buffer must be at last <<def_LIZARD_LEN,`LIZARD_LEN(@in_len)`>>
  65  * bytes long for the output to fit in for sure.
  66  *
  67  * The function returns number of bytes actually needed (the size of output).
  68  *
  69  * Use @lizard_decompress() to get the original data.
  70  **/
  71 int lizard_compress(const byte *in, uint in_len, byte *out);
  72
  73 /**
  74  * Decompress data previously compressed by @lizard_compress().
  75  * Input is taken from @in and the result stored in @out.
  76  * The size of output is returned.
  77  *
  78  * Note that you need to know the maximal possible size of the output to
  79  * allocate enough memory.
  80  *
  81  * See also <<safe,safe decompression>>.
  82  **/
  83 int lizard_decompress(const byte *in, byte *out);
  84
  85 /* lizard-safe.c */
  86
  87 /***
  88  * [[safe]]
  89  * Safe decompression
  90  * ------------------
  91  *
  92  * You can use safe decompression, when you want to make sure you got the
  93  * length right and when you want to reuse the buffer for output.
  94  ***/
  95
  96 struct lizard_buffer;   /** Type of the output buffer for @lizard_decompress_safe(). **/
  97
  98 struct lizard_buffer *lizard_alloc(void);       /** Get me a new <<struct_lizard_buffer,`lizard_buffer`>>. **/
  99 /**
 100  * Return memory used by a <<struct_lizard_buffer,`lizard_buffer`>>.
 101  * It frees even the data stored in it (the result of
 102  * @lizard_decompress_safe() call that used this buffer).
 103  **/
 104 void lizard_free(struct lizard_buffer *buf);
 105
 106 /**
 107  * This one acts much like @lizard_decompress(). The difference is it
 108  * checks the data to be of correct length (therefore it will not
 109  * crash on invalid data).
 110  *
 111  * It decompresses data provided by @in. The @buf is used to get the
 112  * memory for output (you get one by @lizard_alloc()).
 113  *
 114  * The pointer to decompressed data is returned. To free it, free the
 115  * buffer by @lizard_free().
 116  *
 117  * In the case of error, NULL is returned. In that case, `errno` is
 118  * set either to `EINVAL` (expected_length does not match) or to
 119  * `EFAULT` (a segfault has been caught while decompressing -- it
 120  * probably means expected_length was set way too low). Both cases
 121  * suggest either wrongly computed length or data corruption.
 122  *
 123  * The @buf argument may be reused for multiple decompresses. However,
 124  * the data will be overwritten by the next call.
 125  *
 126  * Beware this function is not thread-safe and is not even reentrant
 127  * (because of internal segfault handling).
 128  **/
 129 byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uint expected_length);
 130
 131 /* adler32.c */
 132
 133 /***
 134  * [[adler]]
 135  * Adler-32 checksum
 136  * -----------------
 137  *
 138  * This is here because it is commonly used to check data compressed by LiZaRd.
 139  * However, it could also belong to <<hash,hashing routines>>.
 140  ***/
 141
 142 /**
 143  * Update the Adler-32 checksum with more data.
 144  * @adler is the old value, @byte points to @len bytes of data to update with.
 145  * Result is returned.
 146  **/
 147 uint adler32_update(uint adler, const byte *ptr, uint len);
 148
 149 /**
 150  * Compute the Adler-32 checksum of a block of data.
 151  **/
 152 static inline uint adler32(const byte *buf, uint len)
 153 {
 154   return adler32_update(1, buf, len);
 155 }
 156
 157 #endif