From 499fe23d88f3a4ba3675e6db8b4585f122cddd96 Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Wed, 20 Feb 2008 18:05:10 +0100
Subject: [PATCH] Ranking of permutations.

---
 PLAN         |   4 ++
 biblio.bib   |  18 +++++-
 notation.tex |   7 ++-
 ram.tex      |   2 +-
 rank.tex     | 154 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 177 insertions(+), 8 deletions(-)

diff --git a/PLAN b/PLAN
index 69cf1c4..1f8807d 100644
--- a/PLAN
+++ b/PLAN
@@ -57,6 +57,10 @@ TODO:
 - reference to mixed Boruvka-Jarnik
 - bit tricks: reference to HAKMEM
 
+Ranking:
+
+- the general perspective: is it only a technical trick?
+
 Notation:
 
 - \O(...) as a set?
diff --git a/biblio.bib b/biblio.bib
index ba96577..9be243d 100644
--- a/biblio.bib
+++ b/biblio.bib
@@ -2,7 +2,7 @@
     author = "Michael A. Bender and Martin Farach-Colton",
     title = "The {LCA} Problem Revisited",
     booktitle = "Latin American Theoretical {INformatics}",
-    pages = "88-94",
+    pages = "88--94",
     year = "2000",
     url = "citeseer.ist.psu.edu/bender00lca.html" }
 
@@ -10,7 +10,7 @@
     author = "Greg N. Frederickson",
     title = "Ambivalent Data Structures for Dynamic 2-Edge-Connectivity and $k$ Smallest Spanning Trees",
     booktitle = "{IEEE} Symposium on Foundations of Computer Science",
-    pages = "632-641",
+    pages = "632--641",
     year = "1991",
     url = "citeseer.ist.psu.edu/frederickson91ambivalent.html" }
 
@@ -203,6 +203,18 @@
    year = "2004"
 }
 
+@inproceedings{ mm:rank,
+  author = "Martin Mare\v{s} and Milan Straka",
+  title = "Linear-Time Ranking of Permutations",
+  booktitle = "Algorithms --- ESA 2007: 15th Annual European Symposium",
+  location = "Eilat, Israel, October 8--10, 2007",
+  volume = "4698",
+  series = "Lecture Notes in Computer Science",
+  pages = "187--193",
+  publisher = "Springer",
+  year = "2007",
+}
+
 @article{ ft:fibonacci,
  author = {Michael L. Fredman and Robert Endre Tarjan},
  title = {Fibonacci heaps and their uses in improved network optimization algorithms},
@@ -448,7 +460,7 @@ inproceedings{ pettie:minirand,
 @inproceedings{ katriel:cycle,
   author = "I. Katriel and P. Sanders and J. Tr{\"a}ff",
   title = "A practical minimum spanning tree algorithm using the cycle property",
-  booktitle = "11th European Symposium on Algorithms(ESA)",
+  booktitle = "11th European Symposium on Algorithms (ESA)",
   number = "2832",
   series = "LNCS",
   pages = "679--690",
diff --git a/notation.tex b/notation.tex
index a9f1fa2..f51e746 100644
--- a/notation.tex
+++ b/notation.tex
@@ -44,7 +44,8 @@
 \n{$W$}{word size of the RAM \[wordsize]}
 \n{$\(x)$}{number~$x\in{\bb N}$ written in binary \[bitnota]}
 \n{$\(x)_b$}{$\(x)$ zero-padded to exactly $b$ bits \[bitnota]}
-\n{$x[i]$}{the value of the $i$-th bit of the number~$x$ \[bitnota]}
+\n{$x[i]$}{when $x$ is a~number: the value of the $i$-th bit of~$x$ \[bitnota]}
+\n{$\pi[i]$}{when $\pi$ is a~sequence: the $i$-th element of~$x$, starting with $x[1]$ \[brackets]}
 \n{$\sigma^k$}{the string~$\sigma$ repeated $k$~times \[bitnota]}
 \n{$\0$, $\1$}{bits in a~bit string \[bitnota]}
 \n{$\equiv$}{congruence modulo a~given number}
@@ -57,8 +58,12 @@
 \n{$\bxor$}{bitwise non-equivalence: $(x\bxor y)[i]=1$ iff $x[i]\ne y[i]$}
 \n{$x \shl n$}{bitwise shift of~$x$ by $n$~positions to the left: $x\shl n = x\cdot 2^n$}
 \n{$x \shr n$}{bitwise shift of~$x$ by $n$~positions to the right: $x\shr n = \lfloor x/2^n \rfloor$}
+\n{$\prec$}{an~arbitrary linear order}
 \n{$R_{C,\prec}(x)$}{the rank of~$x$ in a~set~$C$ ordered by~$\prec$ \[rankdef]}
 \n{$R^{-1}_{C,\prec}(i)$}{the unrank of~$i$: the $i$-th smallest element of a~set~$C$ ordered by~$\prec$ \[rankdef]}
+\n{$[n]$}{the set $\{1,2,\ldots,n\}$ \[pranksect]}
+\n{$L(\pi,A)$}{lexicographic ranking function for permutations on a~set~$A\subseteq{\bb N}$ \[brackets]}
+\n{$L^{-1}(i,A)$}{lexicographic unranking function, the inverse of~$L$ \[brackets]}
 }
 
 %--------------------------------------------------------------------------------
diff --git a/ram.tex b/ram.tex
index 2442df3..699d682 100644
--- a/ram.tex
+++ b/ram.tex
@@ -430,7 +430,7 @@ replaced by a~different value in $\O(1)$ time by masking and shifts.
 \medskip
 }
 
-\algn{Operations on vectors with $d$~elements of $b$~bits each}
+\algn{Operations on vectors with $d$~elements of $b$~bits each}\id{vecops}
 
 \itemize\ibull
 
diff --git a/rank.tex b/rank.tex
index 365fa94..1bbd3c0 100644
--- a/rank.tex
+++ b/rank.tex
@@ -27,8 +27,6 @@ is the number~$i$ written in binary and padded to~$k$ digits (i.e., $\(i)_k$ in
 notation of Section~\ref{bitsect}). Obviously, $R(x)$ is the integer whose binary
 representation is~$x$.
 
-\FIXME{Uses of ranks.}
-
 \para
 In this chapter, we will investigate how to compute the ranking and unranking
 functions on different sets. Usually, we will make use of the fact that the ranks
@@ -36,8 +34,158 @@ functions on different sets. Usually, we will make use of the fact that the rank
 use the integers of a~similar magnitude to represent non-trivial data structures.
 This will allow us to design efficient algorithms on the RAM.
 
-\FIXME{In the whole chapter, assume RAM.}
+\FIXME{We will assume RAM in the whole chapter.}
 
 \section{Ranking of permutations}
+\id{pranksect}
+
+One of the most common ranking problems is ranking of permutations on the set~$[n]=\{1,2,\ldots,n\}$.
+This is frequently used to create arrays indexed by permutations: for example in Ruskey's algorithm
+for finding Hamilton cycles in Cayley graphs (see~\cite{ruskey:ham} and \cite{ruskey:hce})
+or when exploring state spaces of combinatorial puzzles like the Loyd's Fifteen \cite{ss:fifteen}.
+Many other applications are surveyed by Critani et al.~in~\cite{critani:rau} and in
+most cases, the time complexity of the whole algorithm is limited by the efficiency
+of the (un)ranking functions.
+
+In most cases, the permutations are ranked according to their lexicographic order.
+In fact, an~arbitrary order is often sufficient if the ranks are used solely
+for indexing of arrays, but the lexicographic order has an~additional advantage
+of a~nice structure allowing many additional operations on permutations to be
+performed directly on their ranks.
+
+Na\"\i{}ve algorithms for lexicographic ranking require time $\Theta(n^2)$ in the
+worst case \cite{reingold:catp} and even on average~\cite{liehe:raulow}.
+This can be easily improved to $O(n\log n)$ by using either a binary search
+tree to calculate inversions, or by a divide-and-conquer technique, or by clever
+use of modular arithmetic (all three algorithms are described in
+\cite{knuth:sas}). Myrvold and Ruskey \cite{myrvold:rank} mention further
+improvements to $O(n\log n/\log \log n)$ by using the RAM data structures of Dietz
+\cite{dietz:oal}.
+
+Linear time complexity was reached by Myrvold and Ruskey \cite{myrvold:rank}
+for a~non-lexicographic order, which is defined locally by the history of the
+data structure --- in fact, they introduce a linear-time unranking algorithm
+first and then they derive an inverse algorithm without describing the order
+explicitly. However, they leave the problem of lexicographic ranking open.
+
+We will describe a~general procedure which, when combined with suitable
+RAM data structures, yields a~linear-time algorithm for lexicographic
+(un)ranking.
+
+\nota\id{brackets}%
+We will view permutations on a~ordered set~$A\subseteq {\bb N}$ as ordered $\vert A\vert$-tuples
+(in other words, arrays) containing every element of~$A$ exactly once. We will
+use square brackets for indexing of these tuples: $\pi=(\pi[1],\ldots,\pi[\vert A\vert])$.
+The corresponding lexicographic ranking and unranking functions will be denoted by~$L(\pi,A)$
+and $L^{-1}(i,A)$ respectively.
+
+\obs
+Let us first observe that permutations have a simple recursive structure.
+If we fix the first element $\pi[1]$ of a~permutation~$\pi$ on the set~$[n]$, the
+elements $\pi[2], \ldots, \pi[n]$ form a~permutation on $[n]-\{\pi[1]\} = \{1,\ldots,\pi[1]-1,\pi[1]+1,\ldots,n\}$.
+The lexicographic order of two permutations $\pi$ and~$\pi'$ on the original set is then determined
+by $\pi[1]$ and $\pi'[1]$ and only if these elements are equal, it is decided
+by the lexicographic comparison of permutations $(\pi[2],\ldots,\pi[n])$ and
+$(\pi'[2],\ldots,\pi'[n])$. Therefore the rank of $\pi$ is $(\pi[1]-1)\cdot
+(n-1)!$ plus the rank of $(\pi[2],\ldots,\pi[n])$.
+
+This gives us a~reduction from (un)ranking of permutations on $[n]$ to (un)ranking
+of permutations on a $(n-1)$-element set, which suggests a straightforward
+algorithm, but unfortunately this set is different from $[n-1]$ and it even
+depends on the value of~$\pi[1]$. We could renumber the elements to get $[n-1]$,
+but it would require linear time per iteration. Instead, we generalize the
+problem to permutations on subsets of $[n]$. For a permutation $\pi$ on a~set
+$A\subseteq [n]$ of size~$m$, similar reasoning gives a~simple formula:
+$$
+L((\pi[1],\ldots,\pi[m]),A) = R_A(\pi[1]) \cdot (m-1)! +
+L((\pi[2],\ldots,\pi[m]), A\setminus\{\pi[1]\}),
+$$
+which uses the ranking function~$R_A$ for~$A$. This formula leads to the
+following algorithms (a~similar formulation can be found for example in~\cite{knuth:sas}):
+
+\alg $\<Rank>(\pi,i,n,A)$: compute the rank of a~permutation $\pi[i\ldots n]$ on~$A$.
+\algo
+\:If $i\ge n$, return~0.
+\:$a\=R_A(\pi[i])$.
+\:$b\=\<Rank>(\pi,i+1,n,A \setminus \{\pi[i]\})$.
+\:Return $a\cdot(n-i)! + b$.
+\endalgo
+
+\>We can call $\<Rank>(\pi,1,n,[n])$ for ranking on~$[n]$, i.e., to calculate
+$L(\pi,[n])$.
+
+\alg $\<Unrank>(j,i,n,A)$: Fill $\pi[i,\ldots,n]$ with the $j$-th permutation on~$A$.
+\algo
+\:If $i>n$, return immediately.
+\:$a\=R^{-1}_A(\lfloor j/(n-i)! \rfloor)$.
+\:$\pi\=\<Unrank>(j\bmod (n-i)!,i+1,n,A\setminus \{a\})$.
+\:$\pi[i]\=a$.
+\endalgo
+
+\>We can call $\<Unrank>(j,1,n,[n])$ for the unranking problem on~$[n]$, i.e., to get $L^{-1}(j,[n])$.
+
+\para
+The most time-consuming parts of the above algorithms are of course operations
+on the set~$A$. If we use a~data structure of a~known time complexity, the complexity
+of the whole algorithm is easy to calculate:
+
+\lemma\id{ranklemma}%
+Suppose that there is a~data structure maintaining a~subset of~$[n]$ under insertion
+and deletion of single elements and ranking and unranking of elements, all in time
+at most $t(n)$ per operation. Then lexicographic ranking and unranking of permutations
+can be performed in time $\O(n\cdot t(n))$.
+
+\proof
+Let us analyse the above algorithms. The depth of the recursion is~$n$ and in each
+nested invokation of the recursive procedure we perform a~constant number of operations.
+All of them are either trivial or factorials (which can be precomputed in~$\O(n)$ time)
+or operations on the data structure.
+\qed
+
+\example
+If store~$A$ in an~ordinary array, we have insertion and deletion in constant time,
+but ranking and unranking in~$\O(n)$, so $t(n)=\O(n)$ and the algorithm is quadratic.
+Binary search trees give $t(n)=\O(\log n)$. The data structure of Dietz \cite{dietz:oal}
+improves it to $t(n)=O(\log n/\log \log n)$. In fact, all these variants are equivalent
+to the classical algorithms based on inversion vectors, because at the time of processing~$\pi[i]$,
+the value of $R_A(\pi[i])$ is exactly the number of elements forming inversions with~$\pi[i]$.
+
+If we relax the requirements on the data structure to allow ordering of elements
+dependent on the history of the structure (i.e., on the sequence of deletes performed
+so far), we can implement all three operations in time $\O(1)$. We store
+the values in an array and we also keep an inverse permutation. Ranking is done by direct
+indexing, unranking by indexing of the inverse permutation and deleting by swapping
+the element with the last element. We can observe that although it no longer
+gives the lexicographic order, the unranking function is still the inverse of the
+ranking function (the sequence of deletes from~$A$ is the same in both functions),
+so this leads to $\O(n)$-time ranking and unranking in a~non-lexicographic order.
+This order is the same as the one used by Myrvold and Ruskey in~\cite{myrvold:rank}.
+
+However, for our purposes we need to keep the same order on~$A$ over the whole
+course of the algorithm. We will make use of the representation of vectors
+by integers on the RAM as developed in Section~\ref{bitsect}, but first of
+all, we will note that the ranks are large numbers, so the word size of the machine
+has to be large as well for them to fit:
+
+\obs
+$\log n! = \Theta(n\log n)$, therefore the word size~$W$ must be~$\Omega(n\log n)$.
+
+\proof
+We have $n^n \ge n! \ge \lfloor n/2\rfloor^{\lfloor n/2\rfloor}$, so $n\log n \ge \log n! \ge \lfloor n/2\rfloor\cdot\log \lfloor n/2\rfloor$.
+\qed
+
+\thmn{Ranking of permutations \cite{mm:rank}} When we order the permutations
+on the set~$[n]$ lexicographically, both ranking and unranking can be performed on the
+RAM in time~$\O(n)$.
+
+\proof
+We will store the elements of the set~$A$ as a~sorted vector. Each element has
+$\O(\log n)$ bits, so the whole vector requires $\O(n\log n)$ bits, which by the
+above observation fits in a~constant number of machine words. We know from
+Algorithm~\ref{vecops} that ranks can be calculated in constant time in such
+vectors and that insertions and deletions can be translated to ranks and
+masking. Unranking, that is indexing of the vector, is masking alone.
+So we can apply the previous Lemma~\ref{ranklemma} with $t(n)=\O(1)$.
+\qed
 
 \endpart
-- 
2.39.5