From 049b863bee2992aed6230ac377dee53c4a08cc85 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Sun, 3 Feb 2008 21:00:31 +0100
Subject: [PATCH] More RAM fixes.

---
 ram.tex | 59 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 29 deletions(-)
diff --git a/ram.tex b/ram.tex
index d178ec2..fad390c 100644
--- a/ram.tex
+++ b/ram.tex
@@ -382,8 +382,8 @@ can be done in constant time, but we have to be careful to avoid division instru
 \notan{Bit strings}\id{bitnota}%
 We will work with binary representations of natural numbers by strings over the
 alphabet $\{\0,\1\}$: we will use $\(x)$ for the number~$x$ written in binary,
-$\(x)_b$ for the same padded to exactly $b$ bits by adding zeroes at the beginning
-and $x[k]$ for the value of the $k$-th bit of~$x$.
+$\(x)_b$ for the same padded to exactly $b$ bits by adding leading zeroes,
+and $x[k]$ for the value of the $k$-th bit of~$x$ (with a~numbering of bits such that $2^k[k]=1$).
 The usual conventions for operations on strings will be utilized: When $s$
 and~$t$ are strings, we write $st$ for their concatenation and
 $s^k$ for the string~$s$ repeated $k$~times.
@@ -392,18 +392,19 @@ we will use $x$ and $\(x)$ interchangeably to avoid outbreak of symbols.
 
 \defn
 The \df{bitwise encoding} of a~vector ${\bf x}=(x_0,\ldots,x_{d-1})$ of~$b$-bit numbers
-is an~integer~$x$ such that $\(x)=\0\(x_{d-1})_b\0\(x_{d-2})_b\0\ldots\0\(x_0)_b = \sum_i 2^{(b+1)i}\cdot x_i$.
-(We have interspersed the elements with \df{separator bits.})
+is an~integer~$x$ such that $\(x)=\(x_{d-1})_b\0\(x_{d-2})_b\0\ldots\0\(x_0)_b$, i.e.,
+$x = \sum_i 2^{(b+1)i}\cdot x_i$. (We have interspersed the elements with \df{separator bits.})
 
 \notan{Vectors}\id{vecnota}%
 We will use boldface letters for vectors and the same letters in normal type
-for their encodings. The elements of a~vector~${\bf x}$ will be denoted as
+for their encodings. The elements of a~vector~${\bf x}$ will be written as
 $x_0,\ldots,x_{d-1}$.
 
 \para
-If we wish for the whole vector to fit in a~single word, we need $(b+1)d\le W$.
+If we want to fit the whole vector in a~single word, the parameters $b$ and~$d$ must satisty
+the condition $(b+1)d\le W$.
 By using multiple-precision arithmetics, we can encode all vectors satisfying $bd=\O(W)$.
-We will now describe how to translate simple vector manipulations to $\O(1)$ RAM operations
+We will now describe how to translate simple vector manipulations to sequences of $\O(1)$ RAM operations
 on their codes. As we are interested in asymptotic complexity only, we prefer clarity
 of the algorithms over saving instructions. Among other things, we freely use calculations
 on words of size $\O(bd)$, assuming that the Multiple-precision lemma comes to save us
@@ -411,9 +412,9 @@ when necessary.
 
 \para
 First of all, let us observe that we can use $\band$ and $\bor$ with suitable constants
-to write zeroes or ones to an~arbitrary set of bit positions in constant time. These operations
-are usually called \df{bit masking}. Also, an~element of a~vector can be extracted or replaced
-using masking and shifts.
+to write zeroes or ones to an~arbitrary set of bit positions at once. These operations
+are usually called \df{bit masking}. Also, any element of a~vector can be extracted or
+replaced by a~different value in $\O(1)$ time by masking and shifts.
 
 \newdimen\slotwd
 \def\setslot#1{\setbox0=#1\slotwd=\wd0}
@@ -437,13 +438,13 @@ using masking and shifts.
 \alik{\<Replicate>(\alpha)=\alpha\cdot(\0^b\1)^d. \cr}
 
 \:$\<Sum>(x)$ --- calculates the sum of the elements of~${\bf x}$, assuming that
-it fits in~$b$ bits:
+the result fits in $b$~bits:
 
 \alik{\<Sum>(x) = x \bmod \1^{b+1}. \cr}
 
 This works because when we work modulo~$\1^{b+1}$, the number $2^{b+1}=\1\0^{b+1}$
 is congruent to~1 and thus $x = \sum_i 2^{(b+1)i}\cdot x_i \equiv \sum_i 1^i\cdot x_i \equiv \sum_i x_i$.
-As the result should fit in $b$~bits, the modulo cannot change it.
+As the result should fit in $b$~bits, the modulo makes no difference.
 
 If we want to avoid division, we can use double-precision multiplication instead:
 
@@ -471,7 +472,7 @@ This way, we even get the vector of all partial sums:
 $s_k=\sum_{i=0}^{k-1}x_i$, $r_k=\sum_{i=k}^{d-1}x_i$.
 
 \:$\<Cmp>(x,y)$ --- element-wise comparison of~vectors ${\bf x}$ and~${\bf y}$,
-i.e., a~vector ${\bf z}$ such that $z_i=1$ iff $x_i<y_i$.
+i.e., a~vector ${\bf z}$ such that $z_i=1$ if $x_i<y_i$ and $z_i=0$ otherwise.
 
 We replace the separator zeroes in~$x$ by ones and subtract~$y$. These ones
 change back to zeroes exactly at the positions where $x_i<y_i$ and they stop
@@ -487,7 +488,7 @@ carries from propagating, so the fields do not interact with each other:
 }
 
 It only remains to shift the separator bits to the right positions, negate them
-and zero out all other bits.
+and mask out all other bits.
 
 \:$\<Rank>(x,\alpha)$ --- return the number of elements of~${\bf x}$ which are less than~$\alpha$,
 assuming that the result fits in~$b$ bits:
@@ -499,10 +500,10 @@ assuming that the result fits in~$b$ bits:
 \:$\<Insert>(x,\alpha)$ --- insert~$\alpha$ into a~sorted vector $\bf x$:
 
 Calculate $k = \<Rank>(x,\alpha)$ first, then insert~$\alpha$ as the $k$-th
-field of~$\bf x$ using masking operations.
+field of~$\bf x$ using masking operations and shifts.
 
 \:$\<Unpack>(\alpha)$ --- create a~vector whose elements are the bits of~$\(\alpha)_d$.
-In other words, we insert blocks~$\0^b$ between the bits of~$\alpha$. Assuming that $b\ge d$,
+In other words, insert blocks~$\0^b$ between the bits of~$\alpha$. Assuming that $b\ge d$,
 we can do it as follows:
 
 \algo
@@ -516,18 +517,18 @@ Let us observe that $z_i$ is either zero or equal to~$y_i$ depending on the valu
 of the $i$-th bit of the number~$\alpha$. Comparing it with~$y_i$ normalizes it
 to either zero or one.
 
-\:$\<Unpack>_\varphi(\alpha)$ --- like \<Unpack>, but changes the order of the
-bits according to a~fixed permutation~$\varphi$: The $i$-th element of the
+\:$\<Unpack>_\pi(\alpha)$ --- like \<Unpack>, but changes the order of the
+bits according to a~fixed permutation~$\pi$: The $i$-th element of the
 resulting vector is equal to~$\alpha[\pi(i)]$.
 
-Implemented as above, but with mask~$y=(2^{\pi(b-1)},\ldots,2^{\pi(0)})$.
+Implemented as above, but with a~mask $y=(2^{\pi(b-1)},\ldots,2^{\pi(0)})$.
 
 \:$\<Pack>(x)$ --- the inverse of \<Unpack>: given a~vector of zeroes and ones,
 it produces a~number whose bits are the elements of the vector (in other words,
 it crosses out the $\0^b$ blocks).
 
 We interpret the~$x$ as an~encoding of a~vector with elements one bit shorter
-and sum these elements. For example, when $n=4$ and~$b=4$:
+and we sum these elements. For example, when $n=4$ and~$b=4$:
 
 \setslot{\hbox{$x_3$}}
 \def\z{\[\0]}
@@ -541,8 +542,8 @@ and sum these elements. For example, when $n=4$ and~$b=4$:
 
 However, this ``reformatting'' does not produce a~correct encoding of a~vector,
 because the separator zeroes are missing. For this reason, the implementation
-of~\<Sum> by modulo does not work correctly (it produces $\1^b$ instead of $\0^b$).
-We use the technique based on multiplication instead, which does not need
+of~\<Sum> using modulo does not work correctly (it produces $\0^b$ instead of $\1^b$).
+We therefore use the technique based on multiplication instead, which does not need
 the separators. (Alternatively, we can observe that $\1^b$ is the only case
 affected, so we can handle it separately.)
 
@@ -572,7 +573,7 @@ Perform $\<Unpack>_\pi$ and \<Pack> back.
 i.e., the smallest~$i$ such that $\alpha[i]=1$.
 
 By a~combination of subtraction with $\bxor$, we create a~number
-which contain ones exactly at positions below $\<LSB>(\alpha)$:
+which contains ones exactly at the position of $\<LSB>(\alpha)$ and below:
 
 \alik{
 \alpha&=			\9\9\9\9\9\1\0\0\0\0\cr
@@ -580,12 +581,12 @@ which contain ones exactly at positions below $\<LSB>(\alpha)$:
 \alpha\bxor(\alpha-1)&=		\0\9\9\9\0\1\1\1\1\1\cr
 }
 
-Then calculate the \<Weight> of the result.
+Then we calculate the \<Weight> of the result and subtract~1.
 
-\:$\<MSB>(\alpha)$ --- find the most significant bit (the position
-of the highest bit set in~$\alpha$).
+\:$\<MSB>(\alpha)$ --- find the most significant bit of~$\alpha$ (the position
+of the highest bit set).
 
-Reverse the bits of the number first by~\<Permute>, then apply \<LSB>
+Reverse the bits of the number~$\alpha$ first by calling \<Permute>, then apply \<LSB>
 and subtract the result from~$b-1$.
 
 \endlist
@@ -606,11 +607,11 @@ The following algorithm shows the details.
 
 \algo\id{lsb}
 \algin A~$w$-bit number~$\alpha$.
-\:$b\=\lceil\sqrt{w}\rceil$. \cmt{size of a~block}
+\:$b\=\lceil\sqrt{w}\,\rceil$. \cmt{size of a~block}
 \:$\ell\=b$. \cmt{the number of blocks is the same}
 \:$x\=(\alpha \band (\0\1^b)^\ell) \bor (\alpha \band (\1\0^b)^\ell)$.
 \hfill\break
-\cmt{$x_i\ne 0$ iff the $i$-th block is non-zero}%
+\cmt{encoding of a~vector~${\bf x}$ such that $x_i\ne 0$ iff the $i$-th block is non-zero}%
 \foot{Why is this so complicated? It is tempting to take $\alpha$ itself as a~code of this vector,
 but we unfortunately need the separator bits between elements, so we create them and
 relocate the bits we have overwritten.}
-- 
2.39.5