From 615570c5ce32fed70ace0e1fc6d28af2b72f31d5 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Sat, 2 Feb 2008 18:51:18 +0100
Subject: [PATCH] More bits on bits.

---
 macros.tex   |  1 +
 notation.tex |  3 +++
 ram.tex      | 49 ++++++++++++++++++++++++++++++-------------------
 3 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/macros.tex b/macros.tex
index 90ecc2f..25df2b3 100644
--- a/macros.tex
+++ b/macros.tex
@@ -48,6 +48,7 @@
 \def\1{{\bf 1}}
 \def\(#1){\mathord{\left<#1\right>}}
 
+% Bitwise operations
 \def\shl{\mathbin{<\!<}}
 \def\shr{\mathbin{>\!>}}
 \def\bop#1{\mathbin{\hbox{\sc #1}}}
diff --git a/notation.tex b/notation.tex
index efe482e..77f174d 100644
--- a/notation.tex
+++ b/notation.tex
@@ -48,6 +48,9 @@
 \n{$\sigma^k$}{the string~$\sigma$ repeated $k$~times \[bitnota]}
 \n{$\0$, $\1$}{bits in a~bit string \[bitnota]}
 \n{$\equiv$}{congruence modulo a~given number}
+\n{$\<LSB>(x)$}{the position of the lowest bit set in~$x$ \[lsbmsb]}
+\n{$\<MSB>(x)$}{the position of the highest bit set in~$x$ \[lsbmsb]}
+\n{$\bf x$}{a~vector with elements $x_1,\ldots,x_d$; $x$ is its bitwise encoding \[vecnota]}
 }
 
 %--------------------------------------------------------------------------------
diff --git a/ram.tex b/ram.tex
index 831d5f0..aca5518 100644
--- a/ram.tex
+++ b/ram.tex
@@ -373,10 +373,6 @@ calculation of the most significant bit set in a~word. We will show below that i
 can be done in constant time, but we have to be careful to avoid division instructions.
 \qed
 
-\notan{Vectors}
-We will use boldface letters for vectors. The components of a~vector~${\bf x}$
-will be denoted as $x_0,\ldots,x_{d-1}$.
-
 \notan{Bit strings}\id{bitnota}%
 We will work with binary representations of natural numbers by strings over the
 alphabet $\{\0,\1\}$: we will use $\(x)$ for the number~$x$ written in binary,
@@ -393,6 +389,11 @@ The \df{bitwise encoding} of a~vector ${\bf x}=(x_0,\ldots,x_{d-1})$ of~$b$-bit
 is an~integer~$x$ such that $\(x)=\0\(x_{d-1})_b\0\(x_{d-2})_b\0\ldots\0\(x_0)_b = \sum_i 2^{(b+1)i}\cdot x_i$.
 (We have interspersed the elements with \df{separator bits.})
 
+\notan{Vectors}\id{vecnota}%
+We will use boldface letters for vectors and the same letters in normal type
+for their encodings. The elements of a~vector~${\bf x}$ will be denoted as
+$x_0,\ldots,x_{d-1}$.
+
 \para
 If we wish for the whole vector to fit in a~single word, we need $(b+1)d\le W$.
 By using multiple-precision arithmetics, we can encode all vectors satisfying $bd=\O(W)$.
@@ -462,24 +463,29 @@ This way, we even get the vector of all partial sums:
 $s_k=\sum_{i=0}^{k-1}x_i$, $r_k=\sum_{i=k}^{d-1}x_i$.
 
 \>$\<Cmp>(x,y)$ --- element-wise comparison of~vectors ${\bf x}$ and~${\bf y}$,
-i.e., a~vector ${\bf z}$ such that $z_i=1$ iff $x_i<y_i$:
+i.e., a~vector ${\bf z}$ such that $z_i=1$ iff $x_i<y_i$.
+
+We replace the separator zeroes in~$x$ by ones and subtract~$y$. These ones
+change back to zeroes exactly at the positions where $x_i<y_i$ and they stop
+carries from propagating, so the fields do not interact with each other:
 
 \setslot{\vbox{\hbox{~$x_{d-1}$}\hbox{~$y_{d-1}$}}}
+\def\9{\rack{\0}{\hss ?\hss}}
 \alik{
    \1 \[x_{d-1}] \1 \[x_{d-2}] \[\cdots] \1 \[x_1] \1 \[x_0]  \cr
 -~ \0 \[y_{d-1}] \0 \[y_{d-2}] \[\cdots] \0 \[y_1] \0 \[y_0]  \cr
+\rule
+   \9 \[\ldots]  \9 \[\ldots]  \[\cdots] \9 \[\ldots] \9 \[\ldots] \cr
 }
 
-We replace the separator zeroes in~$x$ by ones and subtract~$y$. These ones change back to zeroes exactly at
-the positions where $x_i<y_i$ and they stop carries from propagating, so the
-fields do not interact with each other. It only remains to shift the separator
-bits to the right positions, negate them and zero out all other bits.
+It only remains to shift the separator bits to the right positions, negate them
+and zero out all other bits.
 
 \>$\<Rank>(x,\alpha)$ --- return the number of elements of~${\bf x}$ which are less than~$\alpha$,
 assuming that the result fits in~$b$ bits:
 
 \alik{
-\<Sum>(\<Cmp>(x,\<Replicate>(\alpha))) \cr
+\<Rank>(x,\alpha) = \<Sum>(\<Cmp>(x,\<Replicate>(\alpha))). \cr
 }
 
 \>$\<Insert>(x,\alpha)$ --- insert~$\alpha$ into a~sorted vector $\bf x$:
@@ -487,8 +493,9 @@ assuming that the result fits in~$b$ bits:
 Calculate $k = \<Rank>(x,\alpha)$ first, then insert~$\alpha$ as the $k$-th
 field of~$\bf x$ using masking operations.
 
-\>$\<Unpack>(\alpha)$ --- create a~vector whose components are the bits of~$\(\alpha)_b$.
-In other words, it inserts $\0^b$ between every two bits of~$\alpha$.
+\>$\<Unpack>(\alpha)$ --- create a~vector whose elements are the bits of~$\(\alpha)_d$.
+In other words, we insert blocks~$\0^b$ between the bits of~$\alpha$. Assuming that $b\ge d$,
+we can do it as follows:
 
 \algo
 \:$x\=\<Replicate>(\alpha)$.
@@ -538,7 +545,7 @@ operating on $b$-bit numbers and the word size is at least~$b^2$.
 This enables us to make use of intermediate vectors with $b$~elements
 of $b$~bits each.
 
-\algn{Integer operations in quadratic workspace}
+\algn{Integer operations in quadratic workspace}\id{lsbmsb}
 
 \>$\<Weight>(\alpha)$ --- compute the Hamming weight of~$\alpha$, i.e., the number of ones in~$\(\alpha)$.
 
@@ -571,11 +578,14 @@ and subtract the result from~$b-1$.
 
 \rem
 As noted by Brodnik~\cite{brodnik:lsb} and others, the space requirements of
-the \<LSB> operation can be reduced to linear. We split the input to roughly
-$\sqrt{b}$ blocks of roughly $\sqrt{b}$ bits each. Then we find which blocks are
-non-zero and identify the lowest non-zero block (this is \<LSB> of a~number
-whose bits correspond to the blocks). Finally we calculate the \<LSB> of this
-block. The same trick of course works for the \<MSB> as well.
+the \<LSB> operation can be reduced to linear. We split the input to $\sqrt{b}$
+blocks of $\sqrt{b}$ bits each. Then we determine which blocks are non-zero and
+identify the lowest such block (this is a~\<LSB> of a~number whose bits
+correspond to the blocks). Finally we calculate the \<LSB> of this block. In
+both calls to \<LSB,> we have a $\sqrt{b}$-bit number in a~$b$-bit word, so we
+can use the previous algorithm. The same trick of course works for finding the
+\<MSB> as well.
+
 The following algorithm shows the details.
 
 \algn{LSB in linear workspace}
@@ -583,7 +593,8 @@ The following algorithm shows the details.
 \algo\id{lsb}
 \algin A~$w$-bit number~$\alpha$.
 \:$b\=\lceil\sqrt{w}\rceil$. \cmt{size of a~block}
-\:$x\=(\alpha \band (\0\1^b)^b) \bor (\alpha \band (\1\0^b)^b)$.
+\:$\ell\=b$. \cmt{the number of blocks is the same}
+\:$x\=(\alpha \band (\0\1^b)^\ell) \bor (\alpha \band (\1\0^b)^\ell)$.
 \hfill\break
 \cmt{$x_i\ne 0$ iff the $i$-th block is non-zero}%
 \foot{Why is this so complicated? It is tempting to take $\alpha$ itself as a~code of this vector,
-- 
2.39.5