From 499fe23d88f3a4ba3675e6db8b4585f122cddd96 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Wed, 20 Feb 2008 18:05:10 +0100 Subject: [PATCH] Ranking of permutations. --- PLAN | 4 ++ biblio.bib | 18 +++++- notation.tex | 7 ++- ram.tex | 2 +- rank.tex | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 177 insertions(+), 8 deletions(-) diff --git a/PLAN b/PLAN index 69cf1c4..1f8807d 100644 --- a/PLAN +++ b/PLAN @@ -57,6 +57,10 @@ TODO: - reference to mixed Boruvka-Jarnik - bit tricks: reference to HAKMEM +Ranking: + +- the general perspective: is it only a technical trick? + Notation: - \O(...) as a set? diff --git a/biblio.bib b/biblio.bib index ba96577..9be243d 100644 --- a/biblio.bib +++ b/biblio.bib @@ -2,7 +2,7 @@ author = "Michael A. Bender and Martin Farach-Colton", title = "The {LCA} Problem Revisited", booktitle = "Latin American Theoretical {INformatics}", - pages = "88-94", + pages = "88--94", year = "2000", url = "citeseer.ist.psu.edu/bender00lca.html" } @@ -10,7 +10,7 @@ author = "Greg N. Frederickson", title = "Ambivalent Data Structures for Dynamic 2-Edge-Connectivity and $k$ Smallest Spanning Trees", booktitle = "{IEEE} Symposium on Foundations of Computer Science", - pages = "632-641", + pages = "632--641", year = "1991", url = "citeseer.ist.psu.edu/frederickson91ambivalent.html" } @@ -203,6 +203,18 @@ year = "2004" } +@inproceedings{ mm:rank, + author = "Martin Mare\v{s} and Milan Straka", + title = "Linear-Time Ranking of Permutations", + booktitle = "Algorithms --- ESA 2007: 15th Annual European Symposium", + location = "Eilat, Israel, October 8--10, 2007", + volume = "4698", + series = "Lecture Notes in Computer Science", + pages = "187--193", + publisher = "Springer", + year = "2007", +} + @article{ ft:fibonacci, author = {Michael L. Fredman and Robert Endre Tarjan}, title = {Fibonacci heaps and their uses in improved network optimization algorithms}, @@ -448,7 +460,7 @@ inproceedings{ pettie:minirand, @inproceedings{ katriel:cycle, author = "I. Katriel and P. Sanders and J. Tr{\"a}ff", title = "A practical minimum spanning tree algorithm using the cycle property", - booktitle = "11th European Symposium on Algorithms(ESA)", + booktitle = "11th European Symposium on Algorithms (ESA)", number = "2832", series = "LNCS", pages = "679--690", diff --git a/notation.tex b/notation.tex index a9f1fa2..f51e746 100644 --- a/notation.tex +++ b/notation.tex @@ -44,7 +44,8 @@ \n{$W$}{word size of the RAM \[wordsize]} \n{$\(x)$}{number~$x\in{\bb N}$ written in binary \[bitnota]} \n{$\(x)_b$}{$\(x)$ zero-padded to exactly $b$ bits \[bitnota]} -\n{$x[i]$}{the value of the $i$-th bit of the number~$x$ \[bitnota]} +\n{$x[i]$}{when $x$ is a~number: the value of the $i$-th bit of~$x$ \[bitnota]} +\n{$\pi[i]$}{when $\pi$ is a~sequence: the $i$-th element of~$x$, starting with $x[1]$ \[brackets]} \n{$\sigma^k$}{the string~$\sigma$ repeated $k$~times \[bitnota]} \n{$\0$, $\1$}{bits in a~bit string \[bitnota]} \n{$\equiv$}{congruence modulo a~given number} @@ -57,8 +58,12 @@ \n{$\bxor$}{bitwise non-equivalence: $(x\bxor y)[i]=1$ iff $x[i]\ne y[i]$} \n{$x \shl n$}{bitwise shift of~$x$ by $n$~positions to the left: $x\shl n = x\cdot 2^n$} \n{$x \shr n$}{bitwise shift of~$x$ by $n$~positions to the right: $x\shr n = \lfloor x/2^n \rfloor$} +\n{$\prec$}{an~arbitrary linear order} \n{$R_{C,\prec}(x)$}{the rank of~$x$ in a~set~$C$ ordered by~$\prec$ \[rankdef]} \n{$R^{-1}_{C,\prec}(i)$}{the unrank of~$i$: the $i$-th smallest element of a~set~$C$ ordered by~$\prec$ \[rankdef]} +\n{$[n]$}{the set $\{1,2,\ldots,n\}$ \[pranksect]} +\n{$L(\pi,A)$}{lexicographic ranking function for permutations on a~set~$A\subseteq{\bb N}$ \[brackets]} +\n{$L^{-1}(i,A)$}{lexicographic unranking function, the inverse of~$L$ \[brackets]} } %-------------------------------------------------------------------------------- diff --git a/ram.tex b/ram.tex index 2442df3..699d682 100644 --- a/ram.tex +++ b/ram.tex @@ -430,7 +430,7 @@ replaced by a~different value in $\O(1)$ time by masking and shifts. \medskip } -\algn{Operations on vectors with $d$~elements of $b$~bits each} +\algn{Operations on vectors with $d$~elements of $b$~bits each}\id{vecops} \itemize\ibull diff --git a/rank.tex b/rank.tex index 365fa94..1bbd3c0 100644 --- a/rank.tex +++ b/rank.tex @@ -27,8 +27,6 @@ is the number~$i$ written in binary and padded to~$k$ digits (i.e., $\(i)_k$ in notation of Section~\ref{bitsect}). Obviously, $R(x)$ is the integer whose binary representation is~$x$. -\FIXME{Uses of ranks.} - \para In this chapter, we will investigate how to compute the ranking and unranking functions on different sets. Usually, we will make use of the fact that the ranks @@ -36,8 +34,158 @@ functions on different sets. Usually, we will make use of the fact that the rank use the integers of a~similar magnitude to represent non-trivial data structures. This will allow us to design efficient algorithms on the RAM. -\FIXME{In the whole chapter, assume RAM.} +\FIXME{We will assume RAM in the whole chapter.} \section{Ranking of permutations} +\id{pranksect} + +One of the most common ranking problems is ranking of permutations on the set~$[n]=\{1,2,\ldots,n\}$. +This is frequently used to create arrays indexed by permutations: for example in Ruskey's algorithm +for finding Hamilton cycles in Cayley graphs (see~\cite{ruskey:ham} and \cite{ruskey:hce}) +or when exploring state spaces of combinatorial puzzles like the Loyd's Fifteen \cite{ss:fifteen}. +Many other applications are surveyed by Critani et al.~in~\cite{critani:rau} and in +most cases, the time complexity of the whole algorithm is limited by the efficiency +of the (un)ranking functions. + +In most cases, the permutations are ranked according to their lexicographic order. +In fact, an~arbitrary order is often sufficient if the ranks are used solely +for indexing of arrays, but the lexicographic order has an~additional advantage +of a~nice structure allowing many additional operations on permutations to be +performed directly on their ranks. + +Na\"\i{}ve algorithms for lexicographic ranking require time $\Theta(n^2)$ in the +worst case \cite{reingold:catp} and even on average~\cite{liehe:raulow}. +This can be easily improved to $O(n\log n)$ by using either a binary search +tree to calculate inversions, or by a divide-and-conquer technique, or by clever +use of modular arithmetic (all three algorithms are described in +\cite{knuth:sas}). Myrvold and Ruskey \cite{myrvold:rank} mention further +improvements to $O(n\log n/\log \log n)$ by using the RAM data structures of Dietz +\cite{dietz:oal}. + +Linear time complexity was reached by Myrvold and Ruskey \cite{myrvold:rank} +for a~non-lexicographic order, which is defined locally by the history of the +data structure --- in fact, they introduce a linear-time unranking algorithm +first and then they derive an inverse algorithm without describing the order +explicitly. However, they leave the problem of lexicographic ranking open. + +We will describe a~general procedure which, when combined with suitable +RAM data structures, yields a~linear-time algorithm for lexicographic +(un)ranking. + +\nota\id{brackets}% +We will view permutations on a~ordered set~$A\subseteq {\bb N}$ as ordered $\vert A\vert$-tuples +(in other words, arrays) containing every element of~$A$ exactly once. We will +use square brackets for indexing of these tuples: $\pi=(\pi[1],\ldots,\pi[\vert A\vert])$. +The corresponding lexicographic ranking and unranking functions will be denoted by~$L(\pi,A)$ +and $L^{-1}(i,A)$ respectively. + +\obs +Let us first observe that permutations have a simple recursive structure. +If we fix the first element $\pi[1]$ of a~permutation~$\pi$ on the set~$[n]$, the +elements $\pi[2], \ldots, \pi[n]$ form a~permutation on $[n]-\{\pi[1]\} = \{1,\ldots,\pi[1]-1,\pi[1]+1,\ldots,n\}$. +The lexicographic order of two permutations $\pi$ and~$\pi'$ on the original set is then determined +by $\pi[1]$ and $\pi'[1]$ and only if these elements are equal, it is decided +by the lexicographic comparison of permutations $(\pi[2],\ldots,\pi[n])$ and +$(\pi'[2],\ldots,\pi'[n])$. Therefore the rank of $\pi$ is $(\pi[1]-1)\cdot +(n-1)!$ plus the rank of $(\pi[2],\ldots,\pi[n])$. + +This gives us a~reduction from (un)ranking of permutations on $[n]$ to (un)ranking +of permutations on a $(n-1)$-element set, which suggests a straightforward +algorithm, but unfortunately this set is different from $[n-1]$ and it even +depends on the value of~$\pi[1]$. We could renumber the elements to get $[n-1]$, +but it would require linear time per iteration. Instead, we generalize the +problem to permutations on subsets of $[n]$. For a permutation $\pi$ on a~set +$A\subseteq [n]$ of size~$m$, similar reasoning gives a~simple formula: +$$ +L((\pi[1],\ldots,\pi[m]),A) = R_A(\pi[1]) \cdot (m-1)! + +L((\pi[2],\ldots,\pi[m]), A\setminus\{\pi[1]\}), +$$ +which uses the ranking function~$R_A$ for~$A$. This formula leads to the +following algorithms (a~similar formulation can be found for example in~\cite{knuth:sas}): + +\alg $\(\pi,i,n,A)$: compute the rank of a~permutation $\pi[i\ldots n]$ on~$A$. +\algo +\:If $i\ge n$, return~0. +\:$a\=R_A(\pi[i])$. +\:$b\=\(\pi,i+1,n,A \setminus \{\pi[i]\})$. +\:Return $a\cdot(n-i)! + b$. +\endalgo + +\>We can call $\(\pi,1,n,[n])$ for ranking on~$[n]$, i.e., to calculate +$L(\pi,[n])$. + +\alg $\(j,i,n,A)$: Fill $\pi[i,\ldots,n]$ with the $j$-th permutation on~$A$. +\algo +\:If $i>n$, return immediately. +\:$a\=R^{-1}_A(\lfloor j/(n-i)! \rfloor)$. +\:$\pi\=\(j\bmod (n-i)!,i+1,n,A\setminus \{a\})$. +\:$\pi[i]\=a$. +\endalgo + +\>We can call $\(j,1,n,[n])$ for the unranking problem on~$[n]$, i.e., to get $L^{-1}(j,[n])$. + +\para +The most time-consuming parts of the above algorithms are of course operations +on the set~$A$. If we use a~data structure of a~known time complexity, the complexity +of the whole algorithm is easy to calculate: + +\lemma\id{ranklemma}% +Suppose that there is a~data structure maintaining a~subset of~$[n]$ under insertion +and deletion of single elements and ranking and unranking of elements, all in time +at most $t(n)$ per operation. Then lexicographic ranking and unranking of permutations +can be performed in time $\O(n\cdot t(n))$. + +\proof +Let us analyse the above algorithms. The depth of the recursion is~$n$ and in each +nested invokation of the recursive procedure we perform a~constant number of operations. +All of them are either trivial or factorials (which can be precomputed in~$\O(n)$ time) +or operations on the data structure. +\qed + +\example +If store~$A$ in an~ordinary array, we have insertion and deletion in constant time, +but ranking and unranking in~$\O(n)$, so $t(n)=\O(n)$ and the algorithm is quadratic. +Binary search trees give $t(n)=\O(\log n)$. The data structure of Dietz \cite{dietz:oal} +improves it to $t(n)=O(\log n/\log \log n)$. In fact, all these variants are equivalent +to the classical algorithms based on inversion vectors, because at the time of processing~$\pi[i]$, +the value of $R_A(\pi[i])$ is exactly the number of elements forming inversions with~$\pi[i]$. + +If we relax the requirements on the data structure to allow ordering of elements +dependent on the history of the structure (i.e., on the sequence of deletes performed +so far), we can implement all three operations in time $\O(1)$. We store +the values in an array and we also keep an inverse permutation. Ranking is done by direct +indexing, unranking by indexing of the inverse permutation and deleting by swapping +the element with the last element. We can observe that although it no longer +gives the lexicographic order, the unranking function is still the inverse of the +ranking function (the sequence of deletes from~$A$ is the same in both functions), +so this leads to $\O(n)$-time ranking and unranking in a~non-lexicographic order. +This order is the same as the one used by Myrvold and Ruskey in~\cite{myrvold:rank}. + +However, for our purposes we need to keep the same order on~$A$ over the whole +course of the algorithm. We will make use of the representation of vectors +by integers on the RAM as developed in Section~\ref{bitsect}, but first of +all, we will note that the ranks are large numbers, so the word size of the machine +has to be large as well for them to fit: + +\obs +$\log n! = \Theta(n\log n)$, therefore the word size~$W$ must be~$\Omega(n\log n)$. + +\proof +We have $n^n \ge n! \ge \lfloor n/2\rfloor^{\lfloor n/2\rfloor}$, so $n\log n \ge \log n! \ge \lfloor n/2\rfloor\cdot\log \lfloor n/2\rfloor$. +\qed + +\thmn{Ranking of permutations \cite{mm:rank}} When we order the permutations +on the set~$[n]$ lexicographically, both ranking and unranking can be performed on the +RAM in time~$\O(n)$. + +\proof +We will store the elements of the set~$A$ as a~sorted vector. Each element has +$\O(\log n)$ bits, so the whole vector requires $\O(n\log n)$ bits, which by the +above observation fits in a~constant number of machine words. We know from +Algorithm~\ref{vecops} that ranks can be calculated in constant time in such +vectors and that insertions and deletions can be translated to ranks and +masking. Unranking, that is indexing of the vector, is masking alone. +So we can apply the previous Lemma~\ref{ranklemma} with $t(n)=\O(1)$. +\qed \endpart -- 2.39.5