notation.tex

   1 \ifx\endpart\undefined
   2 \input macros.tex
   3 \fi
   4
   5 \chapter{Notation}
   6
   7 {\obeylines\parskip=0pt
   8 \def\n#1#2{\>\hbox to 6em{#1 \dotfill} #2}
   9 \def\[#1]{[\ref{#1}]}
  10 \n{$\bb R$}{the set of all real numbers}
  11 \n{$\bb N$}{the set of all natural numbers, including 0}
  12 \n{${\bb N}^+$}{the set of all positive integers}
  13 \n{$T[u,v]$}{the path in a tree~$T$ joining vertices $u$ and $v$ \[heavy]}
  14 \n{$T[e]$}{the path in a tree~$T$ joining the endpoints of an~edge~$e$ \[heavy]}
  15 \n{$A\symdiff B$}{symetric difference of sets: $(A\setminus B) \cup (B\setminus A)$}
  16 \n{$G-e$}{graph $G$ with edge $e$ removed}
  17 \n{$G+e$}{graph $G$ with edge $e$ added}
  18 \n{$w(e)$}{weight of an edge $e$}
  19 \n{$V(G)$}{set of vertices of a graph~$G$}
  20 \n{$E(G)$}{set of edges of a graph~$G$}
  21 \n{$n(G)$}{number of vertices of a graph~$G$, that is $\vert V(G)\vert$}
  22 \n{$m(G)$}{number of edges of a graph~$G$, that is $\vert E(G)\vert$}
  23 \n{$V,E,n,m$}{when used without $(G)$, they refer to the input of the current algorithm}
  24 \n{$\delta_G(U)$}{all edges connecting $U\subset V(G)$ with $V(G)\setminus U$; we usually omit the~$G$}
  25 \n{$\delta_G(v)$}{the edges of a one-vertex cut, i.e., $\delta_G(\{v\})$}
  26 \n{MST}{minimum spanning tree \[mstdef]}
  27 \n{MSF}{minimum spanning forest \[mstdef]}
  28 \n{$\mst(G)$}{the unique minimum spanning tree of a graph~$G$ \[mstnota]}
  29 \n{$X \choose k$}{a set of all $k$-element subsets of a set~$X$}
  30 \n{$G/e$}{multigraph contraction \[contract]}
  31 \n{$G.e$}{simple graph contraction \[simpcont]}
  32 \n{$\alpha(n)$}{the inverse Ackermann's function}
  33 \n{$f[X]$}{function applied to a set: $f[X]:=\{ f(x) ; x\in X \}$}
  34 \n{$f[e]$}{as edges are two-element sets, $f[e]$ maps both endpoints of an edge~$e$}
  35 \n{$\varrho({\cal C})$}{edge density of a graph class~$\cal C$ \[density]}
  36 \n{$\deg_G(v)$}{degree of vertex~$v$ in graph~$G$; we omit $G$ if it is clear from context}
  37 \n{${\bb E}X$}{expected value of a~random variable~$X$}
  38 \n{${\rm Pr}[\varphi]$}{probability that a predicate~$\varphi$ is true}
  39 \n{$\log n$}{a binary logarithm of the number~$n$}
  40 \n{$f^{(i)}$}{function~$f$ iterated $i$~times: $f^{(0)}(x):=x$, $f^{(i+1)}(x):=f(f^{(i)}(x))$}
  41 \n{$2\tower n$}{the tower function (iterated exponential): $2\tower 0:=1$, $2\tower (n+1):=2^{2\tower n}$}
  42 \n{$\log^* n$}{the iterated logarithm: $\log^*n := \min\{i: \log^{(i)}n < 1\}$; the inverse of~$2\tower n$}
  43 \n{$\beta(m,n)$}{$\beta(m,n) := \min\{i: \log^{(i)}n < m/n \}$ \[itjarthm]}
  44 }
  45
  46 %--------------------------------------------------------------------------------
  47
  48 \section{Multigraphs and contractions}
  49
  50 Since the formalism of multigraphs is not fixed in the literature, we will
  51 better define it carefully, following \cite{diestel:gt}:
  52
  53 \defn A~\df{multigraph} is an ordered triple $(V,E,M)$, where $V$~is the
  54 set of vertices, $E$~is the set of edges, taken as abstract objects disjoint
  55 with the vertices, and $M$ is a mapping $E\mapsto V \cup {V \choose 2}$
  56 which assigns to each edge either a pair of vertices or a single vertex
  57 (if the edge is a loop).
  58
  59 \proclaim{Notation}%
  60 When the meaning is clear from the context, we use our notation originally
  61 defined for graphs even for multigraphs. For example, $xy\in E(G)$ becomes a
  62 shorthand for $\exists e\in E(G)$ such that $M(G)(e) = \{x,y\}$. Also, we
  63 consider multigraphs with no multiple edges nor loops and simple graphs to be
  64 the same objects, although they formally differ.
  65
  66 \defn\id{contract}%
  67 Let $G=(V,E,M)$ be a multigraph and $e=xy$ its edge. \df{(Multigraph) contraction of~$G$ along~$e$}
  68 produces a multigraph $G/e=(V',E',M')$ such that:
  69 $$\eqalign{
  70 V' &= (V(G) \setminus \{x,y\}) \cup \{v_e\},\quad\hbox{where $v_e$ is a new vertex,}\cr
  71 E' &= E(G) - \{e\},\cr
  72 M'(f) &= \{ m(v) ; v\in M(f) \} \quad\hbox{for every $f=\in E'$, and}\cr
  73 m(x) &= \cases{v_e & \hbox{for $v=x,y,$}\cr v & \hbox{otherwise.}} \cr
  74 }$$
  75
  76 Sometimes we need contraction for simple graphs as well. It corresponds to performing
  77 the multigraph contraction, unifying parallel edges and deleting loops.
  78
  79 \defn\id{simpcont}%
  80 Let $G=(V,E)$ a simple graph and $e=xy$ its edge. \df{(Simple graph) contraction of~$G$ along~$e$}
  81 produces a graph $G.e=(V',E')$ such that:
  82 $$\eqalign{
  83 V' &= (V(G) \setminus \{x,y\}) \cup \{v_e\},\quad\hbox{where $v_e$ is a new vertex,}\cr
  84 E' &= \{ \{m(x),m(y)\} ; xy\in E \land m(x)\ne m(y) \},\cr
  85 m(x) &= \cases{v_e & \hbox{for $v=x,y,$}\cr v & \hbox{otherwise.}} \cr
  86 }$$
  87
  88 %--------------------------------------------------------------------------------
  89
  90 \section{Models of computation}
  91
  92 Traditionally, computer scientists use a~variety of computational models
  93 for a~formalism in which their algorithms are stated. If we were studying
  94 NP-completeness, we could safely assume that all the models are equivalent,
  95 possibly up to polynomial slowdown, which is negligible. In our case, the
  96 differences between good and not-so-good algorithms are on a~much smaller
  97 scale, so we need to replace yardsticks with a~micrometer and define our
  98 computation model carefully.
  99
 100 We would like to keep the formalism close enough to the reality of the contemporary
 101 computers. This rules out Turing machines and similar sequentially addressed
 102 models, but even the remaining models are subtly different. For example, some of them
 103 allow indexing of arrays in constant time, while the others have no such operation
 104 and arrays have to be emulated with pointer structures, requiring $\Omega(\log n)$
 105 time to access a~single element of an~$n$-element array. It is hard to say which
 106 way is superior --- most ``real'' computers have instructions for constant-time
 107 indexing, but on the other hand it seems to be physically impossible to fulfil
 108 this promise with an~arbitrary size of memory. Indeed, at the level of logical
 109 gates, the depth of the actual indexing circuit is logarithmic.
 110
 111 In recent decades, most researchers in the area of combinatorial algorithms
 112 consider two computational models: the Random Access Machine and the Pointer
 113 Machine. The former one is closer to the programmer's view of a~computer,
 114 the latter one is a~little more restricted and ``asymptotically safe.''
 115 We will follow this practice and study our algorithms in both models.
 116
 117 \para
 118 The \df{Random Access Machines (RAMs)} are a~family of computational models
 119 which share the following properties. (For one of the possible formal
 120 definitions, see~\cite{knuth:fundalg}.)
 121
 122 The \df{memory} of the model is represented by an~array of \df{memory cells}
 123 addressed by non-negative integers, each of them containing a~single integer.
 124 The \df{program} is a~sequence of \df{instructions} of two basic kinds: calculation
 125 instructions and control instructions.
 126
 127 \df{Calculation instructions} have two source arguments and one destination
 128 argument, the arguments being either immediate constants (not available
 129 as destination), a~directly addressed memory cell (specified by its number)
 130 or an~indirectly addressed memory cell (its address is stored in a~directly
 131 addressed memory cell).
 132
 133 \df{Control instructions} include branches (to a~specific instruction in
 134 the program), conditional branches (jump if two arguments specified as
 135 in the calculation instructions are equal and so on) and an~instruction
 136 to halt the program.
 137
 138 At the beginning of the computation, the memory contains the input data
 139 in specified memory cells and undefined values in all other cells.
 140 Then the program is executed one instruction at a~time. When it stops,
 141 some specified memory cells are interpreted as the program's output.
 142
 143 \para
 144 In the description of the RAM family, we have omitted several properties
 145 on~purpose, because different members of the family define them differently.
 146 The differences are: the size of the numbers we can calculate with, the time
 147 complexity of a~single instruction, the memory complexity of a~single memory
 148 cell and the repertoire of operations available in calculation instructions.
 149
 150 If we impose no limits on the magnitude of the numbers and we assume that
 151 arithmetic and logical operations work on them in constant time, we get
 152 a~very powerful parallel computer --- we can emulate an~arbitrary number
 153 of processors using arithmetics and suddenly almost everything can be
 154 computed in constant time, modulo encoding and decoding of input and output.
 155 Such models are unrealistic and there are two basic possibilities how to
 156 avoid them:
 157
 158 \numlist\ndotted
 159 \:Keep unlimited numbers, but increase cost of instructions: each instruction
 160   consumes time proportional to the number of bits of the numbers it processes,
 161   including memory addresses. Similarly, memory consumption is measured in bits,
 162   counting not only the values, but also the addresses of the respective memory
 163   cells.
 164 \:Place a~limit on the size of the numbers. It must not be constant, since
 165   such machines would be able to address only a~constant amount of memory.
 166   On the other hand, we are interested in polynomial-time algorithms only, so
 167   $\Theta(\log n)$-bit numbers, where~$n$ is the size of the input, should be sufficient.
 168   Then we can keep the cost of instructions and memory cells constant.
 169 \endlist
 170
 171 \FIXME{Mention the word size parameter and cite \cite{hagerup:wordram}}
 172
 173 Both restrictions avoid the problems of unbounded parallelism. The first
 174 choice is theoretically cleaner, but it makes the calculations of time and
 175 space complexity somewhat tedious. What more, these calculations usually result in both
 176 complexities being exactly $\Theta(\log n)$ times higher that with the second
 177 choice. This does not hold in general (consider a~program which uses many
 178 small numbers and $\O(1)$ large ones), but it is true for the algorithms we are
 179 interested in. Therefore we will always assume that the operations have unit
 180 cost and we make sure that all numbers are limited either by $\O(\log n)$ bits
 181 or by the size of numbers on the algorithm's input, whatever is bigger.
 182
 183 As for the choice of RAM operations, the following three variants are usually
 184 considered:
 185
 186 \itemize\ibull
 187 \:\df{Word-RAM} --- allows the ``C-language operators'', i.e., addition,
 188   subtraction, multiplication, division, remainder, bitwise {\sc and,} {\sc or,} exclusive
 189   {\sc or} ({\sc xor}) and negation ({\sc not}).
 190 \:\df{${\rm AC}^0$-RAM} --- allows all operations from the class ${\rm AC}^0$, i.e.,
 191   those computable by constant-depth polynomial-size boolean circuits with unlimited
 192   fan-in and fan-out. This includes all operations of the Word-RAM except for multiplication,
 193   division and remainders, and also many other operations like computing the Hamming
 194   weight (number of bits set in a~given number).
 195 \:Both restrictions at once.
 196 \endlist
 197
 198 As shown by Thorup in \cite{thorup:aczero}, for the usual purposes the first two choices
 199 are equivalent, while the third one is strictly weaker. We will therefore use the
 200 Word-RAM instruction set, mentioning differences of ${\rm AC}^0$-RAM where necessary.
 201
 202 \nota
 203 When speaking of the \df{RAM model,} we implicitly mean the version with limited numbers,
 204 unit cost of operations and memory cells and the instruction set of the Word-RAM.
 205 This corresponds to the usage in recent algorithmic literature, although the
 206 authors rarely mention the details.
 207
 208
 209 \endpart