From 22844d1c9fa8e94db9206bd4f55e4d34de3de9b0 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Fri, 1 Feb 2008 13:50:56 +0100
Subject: [PATCH] Intro on RAM data structures.

---
 biblio.bib | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 ram.tex    | 55 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 112 insertions(+), 2 deletions(-)

diff --git a/biblio.bib b/biblio.bib
index 2adbf87..0ea8521 100644
--- a/biblio.bib
+++ b/biblio.bib
@@ -36,7 +36,7 @@
    year = "1990"
 }
 
-@article{boas77,
+@article{ boas:vebt,
   author    = {Peter van Emde Boas},
   title     = {Preserving Order in a Forest in Less Than Logarithmic Time
                and Linear Space.},
@@ -551,3 +551,60 @@ inproceedings{ pettie:minirand,
   publisher = {Intel Corp.},
   xxx = {available online at http://www.intel.com/products/processor/manuals/index.htm}
 }
+
+@article{ hagerup:dd,
+  title={{Deterministic Dictionaries}},
+  author={Hagerup, T. and Miltersen, P.B. and Pagh, R.},
+  journal={J. Algorithms},
+  volume={41},
+  number={1},
+  pages={69--85},
+  year={2001}
+}
+
+@article{ fredman:sst,
+  title={{Storing a Sparse Table with 0 (1) Worst Case Access Time}},
+  author={Fredman, M.L. and Koml{\'o}s, J. and Szemer{\'e}di, E.},
+  journal={Journal of the ACM (JACM)},
+  volume={31},
+  number={3},
+  pages={538--544},
+  year={1984},
+  publisher={ACM Press New York, NY, USA}
+}
+
+@book{ motwani:randalg,
+  title={{Randomized Algorithms}},
+  author={Motwani, R. and Raghavan, P.},
+  year={1995},
+  publisher={Cambridge University Press}
+}
+
+@article{ fw:fusion,
+  title={{Surpassing the information theoretic bound with fusion trees}},
+  author={Fredman, M.L. and Willard, D.E.},
+  journal={Journal of Computer and System Sciences},
+  volume={47},
+  number={3},
+  pages={424--436},
+  year={1993},
+  publisher={Academic Press, Inc. Orlando, FL, USA}
+}
+
+@article{ han:detsort,
+  title={{Deterministic sorting in O (nlog log n) time and linear space}},
+  author={Han, Y.},
+  journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing},
+  pages={602--608},
+  year={2002},
+  publisher={ACM Press New York, NY, USA}
+}
+
+@article{ hanthor:randsort,
+  title={{Integer Sorting in 0 (n sqrt (log log n)) Expected Time and Linear Space}},
+  author={Han, Y. and Thorup, M.},
+  journal={Proceedings of the 43rd Symposium on Foundations of Computer Science},
+  pages={135--144},
+  year={2002},
+  publisher={IEEE Computer Society Washington, DC, USA}
+}
diff --git a/ram.tex b/ram.tex
index aa01664..e62452e 100644
--- a/ram.tex
+++ b/ram.tex
@@ -128,7 +128,15 @@ ${\rm AC}^0$-RAM where necessary.
 When speaking of the \df{RAM,} we implicitly mean the version with numbers limited
 by a~specified word size of $W$~bits, unit cost of operations and memory cells and the instruction
 set of the Word-RAM. This corresponds to the usage in recent algorithmic literature,
-although the authors rarely mention the details.
+although the authors rarely mention the details. In some cases, a~non-uniform variant
+of the Word-RAM is considered as well (e.g., in~\cite{hagerup:dd}):
+
+\defn
+A~Word-RAM is called \df{weakly non-uniform,} if it is equipped with $\O(1)$-time
+access to a~constant number of word-sized constants, which depend only on the word
+size. These are called \df{native constants} and they are available in fixed memory
+cells when the program starts. (By analogy with the high-level programming languages,
+these constants can be thought of as computed at ``compile time.'')
 
 \para
 The \df{Pointer Machine (PM)} also does not have any well established definition. The
@@ -212,6 +220,14 @@ are finite and there is only a~polynomial number of cells allocated during execu
 of the program, $\O(\log N)$-bit integers suffice.
 \qed
 
+\para
+There are also \df{randomized} versions of both machines. These are equipped
+with an~additional instruction for generating a~single random bit. The standard
+techniques of design and analysis of randomized algorithms then apply (see for
+example Motwani and Raghavan~\cite{motwani:randalg}).
+
+\FIXME{Consult sources. Does it make more sense to generate random words at once on the RAM?}
+
 \rem
 There is one more interesting machine: the \df{Immutable Pointer Machine} (see
 the description of LISP machines in \cite{benamram:pm}). It differs from the
@@ -282,6 +298,43 @@ scanning all~$n$ buckets takes $\O(n+m)$ time.
 
 %--------------------------------------------------------------------------------
 
+\section{Data structures on the RAM}
+
+There is a~lot of data structures designed specifically for the RAM, taking
+advantage of both indexing and arithmetic. In many cases, they surpass the known
+lower bounds for the same problem on the~PM and often achieve constant time
+per operation, at least when either the magnitude of the values or the size of
+the data structure are suitably bounded.
+
+A~classical result of this type are the trees of van Emde Boas~\cite{boas:vebt},
+which represent a~subset of the integers $\{0,\ldots,U-1\}$, allowing insertion,
+deletion and order operations (minimum, maximum, successor etc.) in time $\O(\log\log U)$,
+regardless of the size of the subset. If we plug this structure in the Jarn\'\i{}k's
+algorithm (\ref{jarnik}), replacing the heap, we immediately get an~algorithm
+for finding the MST in integer-weighted graphs in time $\O(m\log\log w_{max})$,
+where $w_{max}$ is the maximum weight. We will show later that it is even
+possible to achieve linear time complexity for arbitrary integer weights.
+
+A~real breakthrough has been made by Fredman and Willard, who introduced
+the Fusion trees~\cite{fw:fusion} which again perform membership and predecessor
+operation on a~set of integers, but this time with complexity $\O(\log_W n)$
+per operation on a~Word-RAM with $W$-bit words. This of course assumes that
+each element of the set fits in a~single word. As $W$ is at least~$\log n$,
+the operations take $\O(\log n/\log\log n)$ and we are able to sort integers
+in time~$o(n\log n)$. This was a~beginning of a~long sequence of faster and
+faster integer sorting algorithms, culminating with the work by Thorup and Han.
+They have improved the time complexity to $\O(n\log\log n)$ deterministically~\cite{han:detsort}
+and expected $\O(n\sqrt{\log\log n})$ for randomized algorithms~\cite{hanthor:randsort},
+both in linear space.
+
+Despite the recent progress, the corner-stone of most RAM data structures
+is still the representation of data structures by integers introduced by Fredman
+and Willard and it will also form a~basis for the rest of this chapter.
+
+\FIXME{Add more history.}
+
+%--------------------------------------------------------------------------------
+
 \section{Bits and vectors}
 
 \endpart
-- 
2.39.5