From 976fa1a1ee96a8f4c97f9d1370d14473c08e33d4 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 17 Mar 2008 16:24:42 +0100 Subject: [PATCH] Karger's sampling lemma. --- PLAN | 1 + adv.tex | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ biblio.bib | 22 ++++++++++++++++- mst.tex | 4 ++-- 4 files changed, 93 insertions(+), 3 deletions(-) diff --git a/PLAN b/PLAN index 2a6ae25..5554906 100644 --- a/PLAN +++ b/PLAN @@ -59,6 +59,7 @@ Spanning trees: - bounded expansion classes? - restricted cases and arborescences - mention parallel algorithms (see remarks in Karger) +- Pettie's paper on random bits Models: diff --git a/adv.tex b/adv.tex index 65d2029..b932458 100644 --- a/adv.tex +++ b/adv.tex @@ -1052,6 +1052,75 @@ is still open even for the RAM. \section{A~randomized algorithm}\id{randmst}% +When we analysed the contractive Bor\o{u}vka's algorithm in Section~\ref{contalg}, +we observed that while the number of vertices per phase decreases exponentially, +the number of edges can remain asymptotically the same. Karger, Klein and Tarjan +\cite{karger:randomized} have overcome this problem by combining the Bor\o{u}vka's +algorithm with filtering based on random sampling. This leads to a~randomized +algorithm which runs in expected linear time. + +The principle of the filtering is simple: Let us consider any spanning tree~$T$ +of the input graph~$G$. Each edge of~$G$ that is $T$-heavy is the heaviest edge +of some cycle, so by the Red lemma (\ref{redlemma}) it cannot participate in +the MST of~$G$. We can therefore discard all $T$-heavy edges and continue with +finding the MST on the reduced graph. Not all choices of~$T$ are equally good, +but the following lemma shows that when we take~$T$ as a~MST of a~randomly selected +subgraph, only a~small expected number of edges remain. + +\rem +The random sampling will sometimes select a~disconnected subgraph, +so we will not assume connectedness in this section. As we already noted +(Remark \ref{disconn}), with a~little bit of care our algorithms and theorems +keep working. We also extend the notion of light and heavy edges with respect +to a~tree to forests: When an~edge~$e$ connects two vertices lying in the same +tree~$T$ of a~forest~$F$, it is $F$-heavy iff it is $T$-heavy (similarly +for $F$-light). Edges connecting two different trees are always considered +$F$-light. Again, a~spanning forest~$F$ is minimum iff there are no $F$-light +edges. + +\lemman{Random sampling, Karger \cite{karger:sampling}} +Let $H$~be a~subgraph of~$G$ obtained by including each edge independently +with probability~$p$ and $F$~the minimum spanning forest of~$H$. Then the +expected number of $F$-nonheavy edges in~$G$ is at most $n/p$. + +\proof +Let us observe that we can obtain the forest~$F$ by combining the Kruskal's algorithm +(\ref{kruskal}) with the random process producing~$H$ from~$G$. We sort all edges of~$G$ +by their weights and start with an~empty forest~$F$. For each edge, we first +flip a~biased coin (it gives heads with probability~$p$) and if it comes up +tails, we discard the edge. Otherwise we perform a~single step of the Kruskal's +algoritm: We check if~$F+e$ contains a~cycle. If it does, we discard~$e$, otherwise +we add~$e$ to~$F$. At the end, we have produced the subgraph~$H$ and its MSF~$F$. + +We can modify the algorithm by swapping the check for cycles with flipping of +the coin: +\algo +\:If $F+e$ contains a~cycle, we immediately discard~$e$ (we can flip +the coin, but we need not to, because the edge will be discarded regardless of +the outcome). We note that~$e$ is $F$-heavy with respect to both the +current~$F$ and the final MSF. +\:If $F+e$ is acyclic, we flip the coin: +\::If it comes up heads, we add~$e$ to~$F$. In this case, $e$~is neither $F$-light + nor $F$-heavy. +\::If it comes up tails, we discard~$e$. Such edges are $F$-light. +\endalgo + +The number of $F$-nonheavy edges is therefore equal to the number of the coin +flips in step~2 of this algorithm. We also know that the algorithm stops before +it adds $n$~edges to~$F$. Therefore it flips at most as many coins as a~simple +random process which repeatedly flips until it gets~$n$ heads. As waiting for +every occurence of heads takes expected time~$1/p$, waiting for~$n$ heads +must take $n/p$. This is the bound we wanted to achieve. +\qed + +\rem +We could also use a~slightly different formulation of the sampling lemma +suggested by Chan \cite{chan:backward}. It changes the selection of the subgraph~$H$ +to choosing an~$mp$-edge subset of~$E(G)$ uniformly at random. The proof is then +a~straightforward application of the backward analysis method. We however prefered +the Karger's original version, because generating a~random subset of a~given size +cannot be generally performed in bounded worst-case time. + %-------------------------------------------------------------------------------- \section{Special cases and related problems} diff --git a/biblio.bib b/biblio.bib index 10335d4..0e406ad 100644 --- a/biblio.bib +++ b/biblio.bib @@ -178,13 +178,33 @@ @article { karger:randomized, author = "D. R. Karger and P. N. Klein and R. E. Tarjan", - title = "{Linear expected-time algorithms for connectivity problems}", + title = "{A Randomized Linear-Time Algorithm to Find Minimum Spanning Trees}", journal = jacm, volume = "42", + number = "2", pages = "321--328", year = "1995" } +@inproceedings { karger:sampling, + title={Random sampling in matroids, with applications to graph connectivity and minimum spanning trees}, + author={Karger, D.R.}, + booktitle={Proceedings of the 34th Annual Symposium on Foundations of Computer Science}, + year={1993}, + pages={84--93} +} + +@article{ chan:backward, + title={{Backwards analysis of the Karger-Klein-Tarjan algorithm for minimum spanning trees}}, + author={Chan, T.M.}, + journal={Information Processing Letters}, + volume={67}, + number={6}, + pages={303--304}, + year={1998}, + publisher={Elsevier} +} + @article { frederickson:online, author = "Greg N. Frederickson", title = "{Data structures for on-line updating of minimum spanning trees}", diff --git a/mst.tex b/mst.tex index 50e7bd2..b79d0c8 100644 --- a/mst.tex +++ b/mst.tex @@ -255,7 +255,7 @@ in~$T_{min}$ joining these vertices must cross~$C$ at least once). Exchanging $e$ for $e'$ in $T_{min}$ yields an even lighter spanning tree since $w(e)