From 9d944002861dfbc3e35320ea196f611a1e030282 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Tue, 29 Jan 2008 21:14:36 +0100
Subject: [PATCH] Introduction to models of computation.

---
 biblio.bib   |  24 +++++++++-
 mst.tex      |   2 +
 notation.tex | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/biblio.bib b/biblio.bib
index f8eb292..852cc0e 100644
--- a/biblio.bib
+++ b/biblio.bib
@@ -48,7 +48,7 @@
   bibsource = {DBLP, http://dblp.uni-trier.de}
 }
 
-@inproceedings{thorup03ac0,
+@inproceedings{ thorup:aczero,
  author = {Mikkel Thorup},
  title = {On AC0 implementations of fusion trees and atomic heaps},
  booktitle = {SODA '03: Proceedings of the fourteenth annual ACM-SIAM symposium on Discrete algorithms},
@@ -60,7 +60,7 @@
  address = {Philadelphia, PA, USA},
  }
 
-@article{ benamram95what,
+@article{ benamram:pm,
     author = "Ben-Amram",
     title = "What is a ``Pointer Machine''?",
     journal = "SIGACTN: SIGACT News (ACM Special Interest Group on Automata and Computability Theory)",
@@ -504,3 +504,23 @@ inproceedings{ pettie:minirand,
   pages={115--132},
   year={1976}
 }
+
+@book{ knuth:fundalg,
+ author = {Donald E. Knuth},
+ title = {The art of computer programming, volume 1 (3rd ed.): fundamental algorithms},
+ year = {1997},
+ isbn = {0-201-89683-4},
+ publisher = {Addison Wesley Longman Publishing Co., Inc.},
+ address = {Redwood City, CA, USA},
+}
+
+@inproceedings{ hagerup:wordram,
+ author = {Torben Hagerup},
+ title = {{Sorting and Searching on the Word RAM}},
+ booktitle = {STACS '98: Proceedings of the 15th Annual Symposium on Theoretical Aspects of Computer Science},
+ year = {1998},
+ isbn = {3-540-64230-7},
+ pages = {366--398},
+ publisher = {Springer-Verlag},
+ address = {London, UK},
+}
diff --git a/mst.tex b/mst.tex
index 6e2b17c..7014cfa 100644
--- a/mst.tex
+++ b/mst.tex
@@ -1121,4 +1121,6 @@ Eisner's tutorial \cite{eisner:tutorial}
 % unify use of n(G) vs. n
 % Euclidean MST
 % Some algorithms (most notably Fredman-Tarjan) do not need flattening
+% more references to RAM
+
 \endpart
diff --git a/notation.tex b/notation.tex
index 3c63ac2..7095226 100644
--- a/notation.tex
+++ b/notation.tex
@@ -43,6 +43,8 @@
 \n{$\beta(m,n)$}{$\beta(m,n) := \min\{i: \log^{(i)}n < m/n \}$ \[itjarthm]}
 }
 
+%--------------------------------------------------------------------------------
+
 \section{Multigraphs and contractions}
 
 Since the formalism of multigraphs is not fixed in the literature, we will
@@ -83,4 +85,125 @@ E' &= \{ \{m(x),m(y)\} ; xy\in E \land m(x)\ne m(y) \},\cr
 m(x) &= \cases{v_e & \hbox{for $v=x,y,$}\cr v & \hbox{otherwise.}} \cr
 }$$
 
+%--------------------------------------------------------------------------------
+
+\section{Models of computation}
+
+Traditionally, computer scientists use a~variety of computational models
+for a~formalism in which their algorithms are stated. If we were studying
+NP-completeness, we could safely assume that all the models are equivalent,
+possibly up to polynomial slowdown, which is negligible. In our case, the
+differences between good and not-so-good algorithms are on a~much smaller
+scale, so we need to replace yardsticks with a~micrometer and define our
+computation model carefully.
+
+We would like to keep the formalism close enough to the reality of the contemporary
+computers. This rules out Turing machines and similar sequentially addressed
+models, but even the remaining models are subtly different. For example, some of them
+allow indexing of arrays in constant time, while the others have no such operation
+and arrays have to be emulated with pointer structures, requiring $\Omega(\log n)$
+time to access a~single element of an~$n$-element array. It is hard to say which
+way is superior --- most ``real'' computers have instructions for constant-time
+indexing, but on the other hand it seems to be physically impossible to fulfil
+this promise with an~arbitrary size of memory. Indeed, at the level of logical
+gates, the depth of the actual indexing circuit is logarithmic.
+
+In recent decades, most researchers in the area of combinatorial algorithms
+consider two computational models: the Random Access Machine and the Pointer
+Machine. The former one is closer to the programmer's view of a~computer,
+the latter one is a~little more restricted and ``asymptotically safe.''
+We will follow this practice and study our algorithms in both models.
+
+\para
+The \df{Random Access Machines (RAMs)} are a~family of computational models
+which share the following properties. (For one of the possible formal
+definitions, see~\cite{knuth:fundalg}.)
+
+The \df{memory} of the model is represented by an~array of \df{memory cells}
+addressed by non-negative integers, each of them containing a~single integer.
+The \df{program} is a~sequence of \df{instructions} of two basic kinds: calculation
+instructions and control instructions.
+
+\df{Calculation instructions} have two source arguments and one destination
+argument, the arguments being either immediate constants (not available
+as destination), a~directly addressed memory cell (specified by its number)
+or an~indirectly addressed memory cell (its address is stored in a~directly
+addressed memory cell).
+
+\df{Control instructions} include branches (to a~specific instruction in
+the program), conditional branches (jump if two arguments specified as
+in the calculation instructions are equal and so on) and an~instruction
+to halt the program.
+
+At the beginning of the computation, the memory contains the input data
+in specified memory cells and undefined values in all other cells.
+Then the program is executed one instruction at a~time. When it stops,
+some specified memory cells are interpreted as the program's output.
+
+\para
+In the description of the RAM family, we have omitted several properties
+on~purpose, because different members of the family define them differently.
+The differences are: the size of the numbers we can calculate with, the time
+complexity of a~single instruction, the memory complexity of a~single memory
+cell and the repertoire of operations available in calculation instructions.
+
+If we impose no limits on the magnitude of the numbers and we assume that
+arithmetic and logical operations work on them in constant time, we get
+a~very powerful parallel computer --- we can emulate an~arbitrary number
+of processors using arithmetics and suddenly almost everything can be
+computed in constant time, modulo encoding and decoding of input and output.
+Such models are unrealistic and there are two basic possibilities how to
+avoid them:
+
+\numlist\ndotted
+\:Keep unlimited numbers, but increase cost of instructions: each instruction
+  consumes time proportional to the number of bits of the numbers it processes,
+  including memory addresses. Similarly, memory consumption is measured in bits,
+  counting not only the values, but also the addresses of the respective memory
+  cells.
+\:Place a~limit on the size of the numbers. It must not be constant, since
+  such machines would be able to address only a~constant amount of memory.
+  On the other hand, we are interested in polynomial-time algorithms only, so
+  $\Theta(\log n)$-bit numbers, where~$n$ is the size of the input, should be sufficient.
+  Then we can keep the cost of instructions and memory cells constant.
+\endlist
+
+\FIXME{Mention the word size parameter and cite \cite{hagerup:wordram}}
+
+Both restrictions avoid the problems of unbounded parallelism. The first
+choice is theoretically cleaner, but it makes the calculations of time and
+space complexity somewhat tedious. What more, these calculations usually result in both
+complexities being exactly $\Theta(\log n)$ times higher that with the second
+choice. This does not hold in general (consider a~program which uses many
+small numbers and $\O(1)$ large ones), but it is true for the algorithms we are
+interested in. Therefore we will always assume that the operations have unit
+cost and we make sure that all numbers are limited either by $\O(\log n)$ bits
+or by the size of numbers on the algorithm's input, whatever is bigger.
+
+As for the choice of RAM operations, the following three variants are usually
+considered:
+
+\itemize\ibull
+\:\df{Word-RAM} --- allows the ``C-language operators'', i.e., addition,
+  subtraction, multiplication, division, remainder, bitwise {\sc and,} {\sc or,} exclusive
+  {\sc or} ({\sc xor}) and negation ({\sc not}).
+\:\df{${\rm AC}^0$-RAM} --- allows all operations from the class ${\rm AC}^0$, i.e.,
+  those computable by constant-depth polynomial-size boolean circuits with unlimited
+  fan-in and fan-out. This includes all operations of the Word-RAM except for multiplication,
+  division and remainders, and also many other operations like computing the Hamming
+  weight (number of bits set in a~given number).
+\:Both restrictions at once.
+\endlist
+
+As shown by Thorup in \cite{thorup:aczero}, for the usual purposes the first two choices
+are equivalent, while the third one is strictly weaker. We will therefore use the
+Word-RAM instruction set, mentioning differences of ${\rm AC}^0$-RAM where necessary.
+
+\nota
+When speaking of the \df{RAM model,} we implicitly mean the version with limited numbers,
+unit cost of operations and memory cells and the instruction set of the Word-RAM.
+This corresponds to the usage in recent algorithmic literature, although the
+authors rarely mention the details.
+
+
 \endpart
-- 
2.39.5