From 22844d1c9fa8e94db9206bd4f55e4d34de3de9b0 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Fri, 1 Feb 2008 13:50:56 +0100 Subject: [PATCH] Intro on RAM data structures. --- biblio.bib | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++- ram.tex | 55 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 2 deletions(-) diff --git a/biblio.bib b/biblio.bib index 2adbf87..0ea8521 100644 --- a/biblio.bib +++ b/biblio.bib @@ -36,7 +36,7 @@ year = "1990" } -@article{boas77, +@article{ boas:vebt, author = {Peter van Emde Boas}, title = {Preserving Order in a Forest in Less Than Logarithmic Time and Linear Space.}, @@ -551,3 +551,60 @@ inproceedings{ pettie:minirand, publisher = {Intel Corp.}, xxx = {available online at http://www.intel.com/products/processor/manuals/index.htm} } + +@article{ hagerup:dd, + title={{Deterministic Dictionaries}}, + author={Hagerup, T. and Miltersen, P.B. and Pagh, R.}, + journal={J. Algorithms}, + volume={41}, + number={1}, + pages={69--85}, + year={2001} +} + +@article{ fredman:sst, + title={{Storing a Sparse Table with 0 (1) Worst Case Access Time}}, + author={Fredman, M.L. and Koml{\'o}s, J. and Szemer{\'e}di, E.}, + journal={Journal of the ACM (JACM)}, + volume={31}, + number={3}, + pages={538--544}, + year={1984}, + publisher={ACM Press New York, NY, USA} +} + +@book{ motwani:randalg, + title={{Randomized Algorithms}}, + author={Motwani, R. and Raghavan, P.}, + year={1995}, + publisher={Cambridge University Press} +} + +@article{ fw:fusion, + title={{Surpassing the information theoretic bound with fusion trees}}, + author={Fredman, M.L. and Willard, D.E.}, + journal={Journal of Computer and System Sciences}, + volume={47}, + number={3}, + pages={424--436}, + year={1993}, + publisher={Academic Press, Inc. Orlando, FL, USA} +} + +@article{ han:detsort, + title={{Deterministic sorting in O (nlog log n) time and linear space}}, + author={Han, Y.}, + journal={Proceedings of the thiry-fourth annual ACM symposium on Theory of computing}, + pages={602--608}, + year={2002}, + publisher={ACM Press New York, NY, USA} +} + +@article{ hanthor:randsort, + title={{Integer Sorting in 0 (n sqrt (log log n)) Expected Time and Linear Space}}, + author={Han, Y. and Thorup, M.}, + journal={Proceedings of the 43rd Symposium on Foundations of Computer Science}, + pages={135--144}, + year={2002}, + publisher={IEEE Computer Society Washington, DC, USA} +} diff --git a/ram.tex b/ram.tex index aa01664..e62452e 100644 --- a/ram.tex +++ b/ram.tex @@ -128,7 +128,15 @@ ${\rm AC}^0$-RAM where necessary. When speaking of the \df{RAM,} we implicitly mean the version with numbers limited by a~specified word size of $W$~bits, unit cost of operations and memory cells and the instruction set of the Word-RAM. This corresponds to the usage in recent algorithmic literature, -although the authors rarely mention the details. +although the authors rarely mention the details. In some cases, a~non-uniform variant +of the Word-RAM is considered as well (e.g., in~\cite{hagerup:dd}): + +\defn +A~Word-RAM is called \df{weakly non-uniform,} if it is equipped with $\O(1)$-time +access to a~constant number of word-sized constants, which depend only on the word +size. These are called \df{native constants} and they are available in fixed memory +cells when the program starts. (By analogy with the high-level programming languages, +these constants can be thought of as computed at ``compile time.'') \para The \df{Pointer Machine (PM)} also does not have any well established definition. The @@ -212,6 +220,14 @@ are finite and there is only a~polynomial number of cells allocated during execu of the program, $\O(\log N)$-bit integers suffice. \qed +\para +There are also \df{randomized} versions of both machines. These are equipped +with an~additional instruction for generating a~single random bit. The standard +techniques of design and analysis of randomized algorithms then apply (see for +example Motwani and Raghavan~\cite{motwani:randalg}). + +\FIXME{Consult sources. Does it make more sense to generate random words at once on the RAM?} + \rem There is one more interesting machine: the \df{Immutable Pointer Machine} (see the description of LISP machines in \cite{benamram:pm}). It differs from the @@ -282,6 +298,43 @@ scanning all~$n$ buckets takes $\O(n+m)$ time. %-------------------------------------------------------------------------------- +\section{Data structures on the RAM} + +There is a~lot of data structures designed specifically for the RAM, taking +advantage of both indexing and arithmetic. In many cases, they surpass the known +lower bounds for the same problem on the~PM and often achieve constant time +per operation, at least when either the magnitude of the values or the size of +the data structure are suitably bounded. + +A~classical result of this type are the trees of van Emde Boas~\cite{boas:vebt}, +which represent a~subset of the integers $\{0,\ldots,U-1\}$, allowing insertion, +deletion and order operations (minimum, maximum, successor etc.) in time $\O(\log\log U)$, +regardless of the size of the subset. If we plug this structure in the Jarn\'\i{}k's +algorithm (\ref{jarnik}), replacing the heap, we immediately get an~algorithm +for finding the MST in integer-weighted graphs in time $\O(m\log\log w_{max})$, +where $w_{max}$ is the maximum weight. We will show later that it is even +possible to achieve linear time complexity for arbitrary integer weights. + +A~real breakthrough has been made by Fredman and Willard, who introduced +the Fusion trees~\cite{fw:fusion} which again perform membership and predecessor +operation on a~set of integers, but this time with complexity $\O(\log_W n)$ +per operation on a~Word-RAM with $W$-bit words. This of course assumes that +each element of the set fits in a~single word. As $W$ is at least~$\log n$, +the operations take $\O(\log n/\log\log n)$ and we are able to sort integers +in time~$o(n\log n)$. This was a~beginning of a~long sequence of faster and +faster integer sorting algorithms, culminating with the work by Thorup and Han. +They have improved the time complexity to $\O(n\log\log n)$ deterministically~\cite{han:detsort} +and expected $\O(n\sqrt{\log\log n})$ for randomized algorithms~\cite{hanthor:randsort}, +both in linear space. + +Despite the recent progress, the corner-stone of most RAM data structures +is still the representation of data structures by integers introduced by Fredman +and Willard and it will also form a~basis for the rest of this chapter. + +\FIXME{Add more history.} + +%-------------------------------------------------------------------------------- + \section{Bits and vectors} \endpart -- 2.39.2