From 136af12f0e36799f53107c35311faf704c232f9a Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Fri, 1 Feb 2008 15:27:14 +0100 Subject: [PATCH] More RAM bits. --- biblio.bib | 19 ++++++++++++++++++- ram.tex | 46 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/biblio.bib b/biblio.bib index 0ea8521..5f7c413 100644 --- a/biblio.bib +++ b/biblio.bib @@ -507,13 +507,22 @@ inproceedings{ pettie:minirand, @book{ knuth:fundalg, author = {Donald E. Knuth}, - title = {The art of computer programming, volume 1 (3rd ed.): fundamental algorithms}, + title = {The Art of Computer Programming, Volume 1 (3rd ed.): Fundamental Algorithms}, year = {1997}, isbn = {0-201-89683-4}, publisher = {Addison Wesley Longman Publishing Co., Inc.}, address = {Redwood City, CA, USA}, } +@book{ knuth:seminalg, + author = {Donald E. Knuth}, + title = {The Art of Computer Programming, Volume 2 (3rd ed.): Seminumerical algorithms}, + year = {1997}, + isbn = {978-0-201-89684-8}, + publisher = {Addison Wesley Longman Publishing Co., Inc.}, + address = {Redwood City, CA, USA}, +} + @inproceedings{ hagerup:wordram, author = {Torben Hagerup}, title = {{Sorting and Searching on the Word RAM}}, @@ -608,3 +617,11 @@ inproceedings{ pettie:minirand, year={2002}, publisher={IEEE Computer Society Washington, DC, USA} } + +@article{ ito:newrap, + title={{Efficient Initial Approximation and Fast Converging Methods for Division and Square Root}}, + author={Ito, M. and Takagi, N. and Yajima, S.}, + journal={Proc. 12th IEEE Symp. Computer Arithmetic}, + pages={2--9}, + year={1995} +} diff --git a/ram.tex b/ram.tex index e62452e..fb3bd54 100644 --- a/ram.tex +++ b/ram.tex @@ -88,6 +88,7 @@ avoid this behavior: On the other hand, we are interested in polynomial-time algorithms only, so $\Theta(\log N)$-bit numbers should be sufficient. In practice, we pick~$w$ to be the larger of $\Theta(\log N)$ and the size of integers used in the algorithm's input and output. + We will call an integer stored in a~single memory cell a~\df{machine word.} \endlist Both restrictions easily avoid the problems of unbounded parallelism. The first @@ -301,7 +302,7 @@ scanning all~$n$ buckets takes $\O(n+m)$ time. \section{Data structures on the RAM} There is a~lot of data structures designed specifically for the RAM, taking -advantage of both indexing and arithmetic. In many cases, they surpass the known +advantage of both indexing and arithmetics. In many cases, they surpass the known lower bounds for the same problem on the~PM and often achieve constant time per operation, at least when either the magnitude of the values or the size of the data structure are suitably bounded. @@ -337,4 +338,47 @@ and Willard and it will also form a~basis for the rest of this chapter. \section{Bits and vectors} +In this rather technical section, we will show how RAM can be used as a~vector +computer to operate in parallel on multiple elements, as long as these elements +fit in a~single machine word. On the first sight this might seem useless, as we +cannot require large word sizes, but surprisingly often the elements are small +enough relative to the size of the algorithm's input and thus also relative to +the minimum possible word size. Also, as the following lemma shows, we can +easily emulate constant-times longer words: + +\lemman{Multiple-precision calculations} +Given a~RAM with $W$-bit words, we can emulate all calculation and control +instructions of a~RAM with word size $kW$ in time depending only on the~$k$. +(This is usually called \df{multiple-precision arithmetics.}) + +\proof +We split each word of the ``big'' machine to $W'=W/2$-bit blocks and store these +blocks in $2k$ consecutive memory cells. Addition, subtraction, comparison, +bitwise logical operations can be performed block-by-block. Shifts by a~multiple +of~$W'$ are trivial, otherwise we can combine each block of the result from +shifted versions of two original blocks. +To multiply two numbers, we can use the elementary school algorithm using the $W'$-bit +blocks as digits in base $2^{W'}$ --- the product of any two blocks fits +in a~single word. + +Division is harder, but Newton-Raphson iteration (see~\cite{ito:newrap}) +converges to the quotient in a~constant number of iterations, each of them +involving $\O(1)$ multiple-precision additions and multiplications. A~good +starting approximation can be obtained by dividing the two most-significant +(non-zero) blocks of both numbers. + +Another approach to division is using the improved elementary school algorithm as described +by Knuth in~\cite{knuth:seminalg}. It uses $\O(k^2)$ steps, but the steps involve +calculation of the most significant bit set in a~word. We will show below that it +can be done in constant time without using division. +\qed + +\nota +We will use boldface letters for vectors. The components of a~vector~${\bf x}$ +will be denoted as $x_1,\ldots,x_n$. + +\defn +The \df{canonical representation} of a~vector $(x_1,\ldots,x_n)$ of~$b$-bit numbers +is an~integer XXXXXXXXXXXXXXX + \endpart -- 2.39.5