From e7ed482d45fbd6e5729884f3798000f63133a0df Mon Sep 17 00:00:00 2001
From: Martin Mares <mj@ucw.cz>
Date: Wed, 20 Feb 2008 22:48:35 +0100
Subject: [PATCH] Ranking of k-permutations.

---
 macros.tex   |   1 +
 notation.tex |   1 +
 rank.tex     | 100 ++++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 97 insertions(+), 5 deletions(-)
diff --git a/macros.tex b/macros.tex
index 2d13cb4..4a0fd1e 100644
--- a/macros.tex
+++ b/macros.tex
@@ -145,6 +145,7 @@
 % Other fonts
 \font\chapfont=csssdc17 scaled \magstep1
 \font\secfont=csb14
+\font\secitfont=csbxti14
 
 %%% FIXME
 \footline={\hss\twelverm\folio\hss}
diff --git a/notation.tex b/notation.tex
index 6e34746..6c2a7d7 100644
--- a/notation.tex
+++ b/notation.tex
@@ -64,6 +64,7 @@
 \n{$[n]$}{the set $\{1,2,\ldots,n\}$ \[pranksect]}
 \n{$L(\pi,A)$}{lexicographic ranking function for permutations on a~set~$A\subseteq{\bb N}$ \[brackets]}
 \n{$L^{-1}(i,A)$}{lexicographic unranking function, the inverse of~$L$ \[brackets]}
+\n{$n^{\underline k}$}{the $k$-th falling factorial power: $n\cdot(n-1)\cdot\ldots\cdot(n-k+1)$ \[kpranksect]}
 }
 
 %--------------------------------------------------------------------------------
diff --git a/rank.tex b/rank.tex
index ede335f..6a9cbd8 100644
--- a/rank.tex
+++ b/rank.tex
@@ -29,12 +29,16 @@ representation is~$x$.
 
 \para
 In this chapter, we will investigate how to compute the ranking and unranking
-functions on different sets. Usually, we will make use of the fact that the ranks
-(and hence the input and output of our algorithm) are large numbers, so we can
-use the integers of a~similar magnitude to represent non-trivial data structures.
-This will allow us to design efficient algorithms on the RAM.
+functions on different sets efficiently. Usually, we will make use of the fact
+that the ranks (and hence the input and output of our algorithm) are large
+numbers, so we can use the integers of a~similar magnitude to represent non-trivial
+data structures.
 
-\FIXME{We will assume RAM in the whole chapter.}
+\para
+Until the end of the chapter, we will always assume that we are working on the
+Random Access Machine.
+
+%--------------------------------------------------------------------------------
 
 \section{Ranking of permutations}
 \id{pranksect}
@@ -104,6 +108,7 @@ which uses the ranking function~$R_A$ for~$A$. This formula leads to the
 following algorithms (a~similar formulation can be found for example in~\cite{knuth:sas}):
 
 \alg $\<Rank>(\pi,i,n,A)$: compute the rank of a~permutation $\pi[i\ldots n]$ on~$A$.
+\id{rankalg}
 \algo
 \:If $i\ge n$, return~0.
 \:$a\=R_A(\pi[i])$.
@@ -115,6 +120,7 @@ following algorithms (a~similar formulation can be found for example in~\cite{kn
 $L(\pi,[n])$.
 
 \alg $\<Unrank>(j,i,n,A)$: Fill $\pi[i,\ldots,n]$ with the $j$-th permutation on~$A$.
+\id{unrankalg}
 \algo
 \:If $i>n$, return immediately.
 \:$a\=R^{-1}_A(\lfloor j/(n-i)! \rfloor)$.
@@ -188,4 +194,88 @@ masking. Unranking, that is indexing of the vector, is masking alone.
 So we can apply the previous Lemma~\ref{ranklemma} with $t(n)=\O(1)$.
 \qed
 
+%--------------------------------------------------------------------------------
+
+\section{Ranking of {\secitfont k\/}-permutations}
+\id{kpranksect}
+
+The technique from the previous section can be also generalized to ranking of
+\df{$k$-permutations,} that is of ordered $k$-tuples drawn from the set~$[n]$,
+again in lexicographic order. There are $n^{\underline k} = n\cdot(n-1)\cdot\ldots\cdot(n-k+1)$
+such $k$-permutations and they have a~recursive structure similar to the one of
+the permutations. We will therefore use the same recursive scheme as before
+(algorithms \ref{rankalg} and \ref{unrankalg}), but we will modify the first step of both algorithms
+to stop after the first~$k$ iterations. We will also replace the number $(n-i)!$
+of permutations on the remaining elements by the number of $(k-i)$-permutations on the same elements,
+i.e., $(n-i)^{\underline{k-i}}$. As $(n-i)^{\underline{k-i}} = (n-i) \cdot (n-i-1)^{\underline{k-i-1}}$,
+we can precalculate all these numbers in linear time.
+
+Unfortunately, the ranks of $k$-permutations can be much smaller, so we can no
+longer rely on the same data structure fitting in a constant number of word-sized integers.
+For example, if $k=1$, the ranks are $\O(\log n)$-bit numbers, but the data
+structure still requires $\Theta(n\log n)$ bits.
+
+We do a minor side step by remembering the complement of~$A$ instead, that is
+the set of the at most~$k$ elements we have already seen. We will call this set~$H$
+(because it describes the ``holes'' in~$A$). Let us prove that $\Omega(k\log n)$ bits
+are needed to represent the rank, so the vector representation of~$H$ fits in
+a~constant number of words.
+
+\lemma
+The number of $k$-permutations on~$[n]$ is $2^{\Omega(k\log n)}$.
+
+\proof
+We already know that there $n^{\underline k}$ such $k$-permutations. If $k\le n/2$,
+then every term in the product is $n/2$ or more, so $\log n^{\underline k} \ge
+k\cdot (\log n - 1)$. If $k\ge n/2$, then $n^{\underline k} \ge n^{\underline{\smash{n/2}}}$
+and $\log n^{\underline k} \ge (n/2)(\log n - 1) \ge (k/2)(\log n - 1)$.
+\qed
+
+\para
+It remains to show how to translate the operations on~$A$ to operations on~$H$,
+again stored as a~sorted vector~${\bf h}$. Insertion to~$A$ correspond to
+deletion in~$H$ and vice versa. The rank of any~$x\in[n]$ in~$A$ is $x$ minus
+the number of holes which are smaller than~$x$, therefore $R_A(x)=x-R_H(x)$.
+To calculate $R_H(x)$, we can again use the vector operation \<Rank> from Algorithm \ref{vecops},
+this time on the vector~$\bf h$.
+
+The only operation we cannot translate directly is unranking in~$A$. We will
+therefore define an~auxiliary vector~$\bf b$ of the same size as~$\bf h$,
+such that $b_i=h_i-i$ (this is the number of elements of~$A$ smaller
+than~$h_i$). Now, if we want to find the $r$-th smallest element of~$A$, we find
+the largest $i$ such that $b_i<r$ (the rank of~$r$ in~$\bf b$) and we return
+$h_i+1+r-b_i$. This works because there is no hole in~$A$ between the element
+$h_i+1$, which has rank~$b_i$, and the desired element of~rank~$r$.
+
+\example
+If $A=\{2,5,6\}$ and $n=8$, then ${\bf h}=(1,3,4,7,8)$ and ${\bf b}
+= (0,1,1,3,3)$. When we want to calculate $R^{-1}_A(2)$, we find $i=3$ and we
+get $g_3+1+2-b_3 = 4+1+2-1 = 6$.
+
+\para
+The vector~$\bf b$ can be updated in constant time whenever an~element is
+inserted to~$\bf h$. It is sufficient to shift the fields apart (we know
+that the position of the new element in~$\bf b$ is the same as in~$\bf h$),
+set the new value using masking operations and decrease all higher fields
+by one in parallel by using a~single subtraction. Updates after deletions
+from~$\bf h$ are analogous.
+
+\FIXME{State the algorithms properly.}
+
+We have replaced all operations on~$A$ by the corresponding operations on the
+modified data structure, each of which works again in constant time. Therefore
+we have just proven the following theorem, which brings this section to
+a~happy ending:
+
+\thmn{Ranking of $k$-permutations \cite{mm:rank}}
+When we order the $k$-per\-mu\-ta\-tions on the set~$[n]$ lexicographically, both
+ranking and unranking can be performed on the RAM in time~$\O(k)$.
+
+\proof
+Modify Algorithms \ref{rankalg} and \ref{unrankalg} as described in this section.
+The modified data structure performs all required operations on the set~$A$ in
+constant time. The depth of the recursion is $\O(k)$ and we spend $\O(1)$ time
+at every level. The time bound follows.
+\qed
+
 \endpart
-- 
2.39.5