From 0bdd9c0d8b85dc8c4f53f9dbaff300a6834d7724 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Sat, 2 Feb 2008 18:29:08 +0100
Subject: [PATCH] More bit tricks.

---
 PLAN         |  1 +
 macros.tex   |  2 +-
 notation.tex |  2 ++
 ram.tex      | 62 +++++++++++++++++++++++++++++++---------------------
 4 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/PLAN b/PLAN
index 961ace3..6be7bd8 100644
--- a/PLAN
+++ b/PLAN
@@ -54,6 +54,7 @@ TODO:
 - mention in-place radix-sorting?
 - ranking of permutations on general sets, relationship with integer sorting
 - reference to mixed Boruvka-Jarnik
+- bit tricks: reference to HAKMEM
 
 Notation:
 
diff --git a/macros.tex b/macros.tex
index 1738889..90ecc2f 100644
--- a/macros.tex
+++ b/macros.tex
@@ -46,7 +46,7 @@
 % Bit strings
 \def\0{{\bf 0}}
 \def\1{{\bf 1}}
-\def\(#1){\left<#1\right>}
+\def\(#1){\mathord{\left<#1\right>}}
 
 \def\shl{\mathbin{<\!<}}
 \def\shr{\mathbin{>\!>}}
diff --git a/notation.tex b/notation.tex
index 076d1c1..efe482e 100644
--- a/notation.tex
+++ b/notation.tex
@@ -44,7 +44,9 @@
 \n{$W$}{word size of the RAM \[wordsize]}
 \n{$\(x)$}{number~$x\in{\bb N}$ written in binary \[bitnota]}
 \n{$\(x)_b$}{$\(x)$ zero-padded to exactly $b$ bits \[bitnota]}
+\n{$x[i]$}{the value of the $i$-th bit of the number~$x$ \[bitnota]}
 \n{$\sigma^k$}{the string~$\sigma$ repeated $k$~times \[bitnota]}
+\n{$\0$, $\1$}{bits in a~bit string \[bitnota]}
 \n{$\equiv$}{congruence modulo a~given number}
 }
 
diff --git a/ram.tex b/ram.tex
index 97a6020..831d5f0 100644
--- a/ram.tex
+++ b/ram.tex
@@ -132,7 +132,7 @@ set of the Word-RAM. This corresponds to the usage in recent algorithmic literat
 although the authors rarely mention the details. In some cases, a~non-uniform variant
 of the Word-RAM is considered as well (e.g., in~\cite{hagerup:dd}):
 
-\defn
+\defn\id{nonuniform}%
 A~Word-RAM is called \df{weakly non-uniform,} if it is equipped with $\O(1)$-time
 access to a~constant number of word-sized constants, which depend only on the word
 size. These are called \df{native constants} and they are available in fixed memory
@@ -378,29 +378,35 @@ We will use boldface letters for vectors. The components of a~vector~${\bf x}$
 will be denoted as $x_0,\ldots,x_{d-1}$.
 
 \notan{Bit strings}\id{bitnota}%
-We will work with binary representations of natural numbers as strings over the
+We will work with binary representations of natural numbers by strings over the
 alphabet $\{\0,\1\}$: we will use $\(x)$ for the number~$x$ written in binary,
-$\(x)_b$ for the same padded to exactly $b$ bits by adding zeroes at the beginning.
-Strings will be denoted by Greek letters and the usual conventions will be utilized
-for string operations: $\alpha\beta$ for concatenation and $\alpha^k$ for the
-string~$\alpha$ repeated $k$~times. When the meaning is clear from the context,
+$\(x)_b$ for the same padded to exactly $b$ bits by adding zeroes at the beginning
+and $x[k]$ for the value of the $k$-th bit of~$x$.
+The usual conventions for operations on strings will be utilized: When $s$
+and~$t$ are strings, we write $st$ for their concatenation and
+$s^k$ for the string~$s$ repeated $k$~times.
+When the meaning is clear from the context,
 we will use $x$ and $\(x)$ interchangeably to avoid outbreak of symbols.
 
 \defn
 The \df{bitwise encoding} of a~vector ${\bf x}=(x_0,\ldots,x_{d-1})$ of~$b$-bit numbers
-is an~integer $\0\(x_{d-1})_b\0\(x_{d-2})_b\0\ldots\0\(x_0)_b = \sum_i 2^{(b+1)i}\cdot x_i$.
+is an~integer~$x$ such that $\(x)=\0\(x_{d-1})_b\0\(x_{d-2})_b\0\ldots\0\(x_0)_b = \sum_i 2^{(b+1)i}\cdot x_i$.
+(We have interspersed the elements with \df{separator bits.})
 
 \para
 If we wish for the whole vector to fit in a~single word, we need $(b+1)d\le W$.
 By using multiple-precision arithmetics, we can encode all vectors satisfying $bd=\O(W)$.
 We will now describe how to translate simple vector manipulations to $\O(1)$ RAM operations
-on their codes.
+on their codes. As we are interested in asymptotic complexity only, we prefer clarity
+of the algorithms over saving instructions. Among other things, we freely use calculations
+on words of size $\O(bd)$, assuming that the Multiple-precision lemma comes to save us
+when necessary.
 
-\FIXME{Mention multi-prec. intermediates}
-\FIXME{Mention isolation of fields?}
-\FIXME{Fix notation and add it to the table}
-\FIXME{Masking}
-\FIXME{Optimality}
+\para
+First of all, let us observe that we can use $\band$ and $\bor$ with suitable constants
+to write zeroes or ones to an~arbitrary set of bit positions in constant time. These operations
+are usually called \df{bit masking}. Also, an~element of a~vector can be extracted or replaced
+using masking and shifts.
 
 \newdimen\slotwd
 \def\setslot#1{\setbox0=#1\slotwd=\wd0}
@@ -410,7 +416,7 @@ on their codes.
 
 \def\alik#1{%
 \medskip
-\halign{\hskip 0.2\hsize\hfil $ ##$&\hbox to 0.6\hsize{${}##$ \hss}\cr
+\halign{\hskip 0.15\hsize\hfil $ ##$&\hbox to 0.6\hsize{${}##$ \hss}\cr
 #1}
 \medskip
 }
@@ -419,14 +425,14 @@ on their codes.
 
 \>$\<Replicate>(\alpha)$ --- creates a~vector $(\alpha,\ldots,\alpha)$:
 
-\alik{\alpha*(\0^b\1)^d \cr}
+\alik{\<Replicate>(\alpha)=\alpha\cdot(\0^b\1)^d. \cr}
 
 \>$\<Sum>(x)$ --- calculates the sum of the elements of~${\bf x}$, assuming that
 it fits in~$b$ bits:
 
-\alik{x \bmod \1^{b+1} \cr}
+\alik{\<Sum>(x) = x \bmod \1^{b+1}. \cr}
 
-This works because when taken modulo~$\1^{b+1}$, the number $2^{b+1}=\1\0^{b+1}$
+This works because when we work modulo~$\1^{b+1}$, the number $2^{b+1}=\1\0^{b+1}$
 is congruent to~1 and thus $x = \sum_i 2^{(b+1)i}\cdot x_i \equiv \sum_i 1^i\cdot x_i \equiv \sum_i x_i$.
 As the result should fit in $b$~bits, the modulo cannot change it.
 
@@ -464,8 +470,7 @@ i.e., a~vector ${\bf z}$ such that $z_i=1$ iff $x_i<y_i$:
 -~ \0 \[y_{d-1}] \0 \[y_{d-2}] \[\cdots] \0 \[y_1] \0 \[y_0]  \cr
 }
 
-We replace the separator zeroes in~$x$ by ones (this is just an~$\bor$ with
-a~suitable constant) and subtract~$y$. These ones change back to zeroes exactly at
+We replace the separator zeroes in~$x$ by ones and subtract~$y$. These ones change back to zeroes exactly at
 the positions where $x_i<y_i$ and they stop carries from propagating, so the
 fields do not interact with each other. It only remains to shift the separator
 bits to the right positions, negate them and zero out all other bits.
@@ -477,7 +482,7 @@ assuming that the result fits in~$b$ bits:
 \<Sum>(\<Cmp>(x,\<Replicate>(\alpha))) \cr
 }
 
-\>$\<Insert>(x,\alpha)$ --- insert~$\alpha$ to a~sorted vector $\bf x$:
+\>$\<Insert>(x,\alpha)$ --- insert~$\alpha$ into a~sorted vector $\bf x$:
 
 Calculate $k = \<Rank>(x,\alpha)$ first, then insert~$\alpha$ as the $k$-th
 field of~$\bf x$ using masking operations.
@@ -498,7 +503,7 @@ to either zero or one.
 
 \>$\<Unpack>_\varphi(\alpha)$ --- like \<Unpack>, but changes the order of the
 bits according to a~fixed permutation~$\varphi$: The $i$-th element of the
-resulting vector is equal to the $\pi(i)$-th bit of~$\alpha$.
+resulting vector is equal to~$\alpha[\pi(i)]$.
 
 Implemented as above, but with mask~$y=(2^{\pi(b-1)},\ldots,2^{\pi(0)})$.
 
@@ -526,8 +531,6 @@ We use the technique based on multiplication instead, which does not need
 the separators. (Alternatively, we can observe that $\1^b$ is the only case
 affected, so we can handle it separately.)
 
-\FIXME{Note constants}
-
 \para
 We can use the above tricks to perform interesting operations on individual
 numbers in constant time, too. Let us assume for a~while that we are
@@ -547,7 +550,7 @@ to a~fixed permutation~$\pi$.
 Perform $\<Unpack>_\pi$ and \<Pack> back.
 
 \>$\<LSB>(\alpha)$ --- find the least significant bit of~$\alpha$,
-i.e., the minimum~$i$ such that the $i$-th bit of~$\alpha$ is one.
+i.e., the minimum~$i$ such that $\alpha[i]=1$.
 
 By a~combination of subtraction with $\bxor$, we create a~number
 which contain ones exactly at positions below $\<LSB>(\alpha)$:
@@ -577,7 +580,7 @@ The following algorithm shows the details.
 
 \algn{LSB in linear workspace}
 
-\algo
+\algo\id{lsb}
 \algin A~$w$-bit number~$\alpha$.
 \:$b\=\lceil\sqrt{w}\rceil$. \cmt{size of a~block}
 \:$x\=(\alpha \band (\0\1^b)^b) \bor (\alpha \band (\1\0^b)^b)$.
@@ -594,4 +597,13 @@ relocate the bits we have overwritten.}
 \algout $\<LSB>(\alpha) = bp+q$.
 \endalgo
 
+\rem
+We have used a~plenty of constants which depend on the format of the vectors.
+Either we can write non-uniform programs (see \ref{nonuniform}) and use native constants,
+or we can observe that all such constants can be easily manufactured. For example,
+$(\0^b\1)^d = \1^{bd} / \1^{b+1} = (2^{bd}-1)/(2^{b+1}-1)$. The only exceptions
+are the~$w$ and~$b$ in the LSB algorithm \ref{lsb}, which we are unable to produce
+in constant time.
+
+
 \endpart
-- 
2.39.5