From fdb438738b41d339e87aaa0c93620a45660ffa5d Mon Sep 17 00:00:00 2001
From: Michal Vaner <vorner@ucw.cz>
Date: Thu, 27 Nov 2008 17:19:17 +0100
Subject: [PATCH] ucw docs: The lizard compression algorithm

---
 ucw/doc/Makefile     |  2 +-
 ucw/doc/compress.txt | 15 +++++++
 ucw/doc/hash.txt     | 15 +++++++
 ucw/doc/index.txt    |  3 +-
 ucw/lizard.h         | 96 ++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 121 insertions(+), 10 deletions(-)
 create mode 100644 ucw/doc/compress.txt

diff --git a/ucw/doc/Makefile b/ucw/doc/Makefile
index c14ff1ec..ecb22d5c 100644
--- a/ucw/doc/Makefile
+++ b/ucw/doc/Makefile
@@ -2,7 +2,7 @@
 
 DIRS+=ucw/doc
 
-UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap
+UCW_DOCS=fastbuf index config configure install basecode hash docsys conf mempool eltpool mainloop generic growbuf unaligned lists chartype unicode prime binsearch heap binheap compress
 UCW_INDEX=$(o)/ucw/doc/def_index.html
 UCW_DOCS_HTML=$(addprefix $(o)/ucw/doc/,$(addsuffix .html,$(UCW_DOCS)))
 
diff --git a/ucw/doc/compress.txt b/ucw/doc/compress.txt
new file mode 100644
index 00000000..7e950997
--- /dev/null
+++ b/ucw/doc/compress.txt
@@ -0,0 +1,15 @@
+Compression
+===========
+
+The library contains a compression routine, called LiZaRd.  It is
+modified Lempel-Ziv 77 method with slightly worse compression ratio,
+but with faster compression and decompression.
+
+// TODO Meaning of the abbreviation
+// TODO Actual numbers how fast it is?
+
+- <<basic,Basic application>>
+- <<safe,Safe decompression>>
+- <<adler,Adler-32 checksum>>
+
+!!ucw/lizard.h
diff --git a/ucw/doc/hash.txt b/ucw/doc/hash.txt
index bcdbb7e3..3e01b941 100644
--- a/ucw/doc/hash.txt
+++ b/ucw/doc/hash.txt
@@ -13,6 +13,9 @@ There are non-cryptographic hashes as well.
 - <<sha1:sha1_hmac(),SHA1_HMAC>>
 - <<usage,Common usage>>
 
+<<checksum,Checksums>>:
+- <<crypto:adler,Adler-32>>
+
 <<nocrypto,Non-cryptographic ones>>:
 
 - <<strhash,String & block hashes>>
@@ -63,10 +66,22 @@ SHA1 has the same interface, so the same two ways apply.
 
 See also <<string:mem_to_hex()>>.
 
+[[checksum]]
+Checksums
+---------
+
+Their purpose is checking against random data changes, hardware
+failures and alike. They are not to be used against aimed attacks.
+
+The <<compress:adler,Adler-32 checksum>> is documented in the
+<<compression,compression capter>>.
+
 [[nocrypto]]
 Non-cryptographic hashes
 ------------------------
 
+They are usually used to identify values in hash tables.
+
 All these functions expect to be moduled by the size of a hash table.
 The size should be a prime number (it gives better distribution).
 
diff --git a/ucw/doc/index.txt b/ucw/doc/index.txt
index ab7dce96..69823cd1 100644
--- a/ucw/doc/index.txt
+++ b/ucw/doc/index.txt
@@ -30,6 +30,7 @@ Modules
 - <<binsearch:,Binary search>>
 - <<heap:,Binary heaps>>
 - <<binheap:,Binomial heaps>>
+- <<compress:,Compression>>
 
 Other features
 --------------
@@ -69,8 +70,6 @@ Yet undocumented modules
   * `ipaccess.h`
 - Prefetching of memory
   * `prefetch.h`
-- Compression
-  * `lizard.h`
 - Caches
   * `qache.h`
 - Threads
diff --git a/ucw/lizard.h b/ucw/lizard.h
index 547f181d..f4520725 100644
--- a/ucw/lizard.h
+++ b/ucw/lizard.h
@@ -10,9 +10,18 @@
 #ifndef _UCW_LIZARD_H
 #define _UCW_LIZARD_H
 
+/***
+ * [[basic]]
+ * Basic application
+ * -----------------
+ **/
+
+/**
+ * The compression routine needs input buffer 8 characters longer, because it
+ * does not check the input bounds all the time.
+ **/
 #define	LIZARD_NEEDS_CHARS	8
-  /* The compression routine needs input buffer 8 characters longer, because it
-   * does not check the input bounds all the time.  */
+
 #define	LIZARD_MAX_MULTIPLY	23./22
 #define	LIZARD_MAX_ADD		4
   /* In the worst case, the compressed file will not be longer than its
@@ -26,22 +35,95 @@
    * total length is 2(header) + 19(string) + 2(link) = 23.
    */
 
+/**
+ * The compressed data will not be longer than `LIZARD_MAX_LEN(input_length)`.
+ * Note that `LIZARD_MAX_LEN(length) > length` (this is not a problem of the algorithm,
+ * every lossless compression algorithm must have an input for which it produces a larger
+ * output).
+ *
+ * Use this to compute the size of @out paramater of @lizard_compress().
+ **/
+#define LIZARD_MAX_LEN(LENGTH) ((LENGTH) * LIZARD_MAX_MULTIPLY + LIZARD_MAX_ADD)
+
 /* lizard.c */
+
+/**
+ * Compress data provided in @in.
+ * The input buffer must be at last `@in_len + <<def_LIZARD_NEEDS_CHARS,LIZARD_NEEDS_CHARS>>`
+ * long (the compression algorithm does not check the bounds all the time).
+ *
+ * The output will be stored in @out. The @out buffer must be at last <<def_LIZARD_LEN,`LIZARD_LEN(@in_len)`>>
+ * bytes long for the output to fit in for sure.
+ *
+ * The function returns number of bytes actually needed (the size of output).
+ *
+ * Use @lizard_decompress() to get the original data.
+ **/
 int lizard_compress(const byte *in, uns in_len, byte *out);
+
+/**
+ * Decompress data previously compressed by @lizard_compress().
+ * Input is taken from @in and the result stored in @out.
+ * The size of output is returned.
+ *
+ * Note that you need to know the maximal possible size of the output to
+ * allocate enough memory.
+ *
+ * See also <<safe,safe decompression>>.
+ **/
 int lizard_decompress(const byte *in, byte *out);
 
 /* lizard-safe.c */
-struct lizard_buffer;
 
-struct lizard_buffer *lizard_alloc(void);
-void lizard_free(struct lizard_buffer *buf);
+/***
+ * [[safe]]
+ * Safe decompression
+ * ------------------
+ *
+ * You can use safe decompression, when you want to make sure you got the
+ * length right and when you want to reuse the buffer for output.
+ ***/
+
+struct lizard_buffer;	/** Type of the output buffer for @lizard_decompress_safe(). **/
+
+struct lizard_buffer *lizard_alloc(void);	/** Get me a new <<struct_lizard_buffer,`lizard_buffer`>>. **/
+void lizard_free(struct lizard_buffer *buf);	/** Return memory used by a <<struct_lizard_buffer,`lizard_buffer`>>. **/
+
+/**
+ * Decompress data previously compressed by @lizard_compress().
+ * Input is taken from @in. @buf is used to store the output.
+ * You need to provide the length of the uncompressed data in @expected_length.
+ *
+ * The pointer to data is returned.
+ *
+ * If an error occurs, NULL is returned and `errno` is set.
+ * `EINVAL` means the actual length does not match @expected_length.
+ * `EFAULT` means a segfault was encountered while decompressing (probably @expected_length was way too low).
+ **/
 byte *lizard_decompress_safe(const byte *in, struct lizard_buffer *buf, uns expected_length);
 
 /* adler32.c */
+
+/***
+ * [[adler]]
+ * Adler-32 checksum
+ * -----------------
+ *
+ * This is here because it is commonly used to check data compressed by LiZaRd.
+ * However, it could also belong to <<hash,hashing routines>>.
+ ***/
+
+/**
+ * Update the Adler-32 checksum with more data.
+ * @adler is the old value, @byte points to @len bytes of data to update with.
+ * Result is returned.
+ **/
 uns adler32_update(uns adler, const byte *ptr, uns len);
 
-static inline uns
-adler32(const byte *buf, uns len)
+/**
+ * Compute the Adler-32 checksum of a block of data.
+ **/
+static inline uns adler32(const byte *buf, uns len)
 {
   return adler32_update(1, buf, len);
 }
-- 
2.47.3