1 # Perl module for sending queries to Sherlock search servers and parsing answers
3 # (c) 2002 Martin Mares <mj@ucw.cz>
5 # This software may be freely distributed and used according to the terms
6 # of the GNU Lesser General Public License.
10 Sherlock::Query -- Communication with Sherlock Search Servers
14 This perl library offers a simple interface for connecting to Sherlock
15 search servers, sending queries or control commands and parsing the
18 First of all, you have to use
20 my $conn = new Sherlock::Query('server:port');
22 to create a new connection object (unconnected yet). Then you can call
24 my $res = $conn->command('command');
26 to establish the connection, send a given command to the search server
27 and gather the results (see below) or, if you want to send a normal query,
29 my $res = $conn->query('"simple" OR "query"');
31 which does the same as C<< $conn->command(...) >>, but it also parses the
32 results to a representation convenient for handling in Perl programs
35 Currently, you can use a single connection to send only a single command or query.
39 The I<raw answer> of the search server (i.e., the lines it has returned) is always
40 available as C<< $conn->{RAW} >> as a list of strings, each representing a single
43 Parsed results of queries are stored in a more complicated way, but before
44 explaining it, let's mention a couple of axioms: Any search server I<object>
45 (header, footer, a single document of answer) is always stored as a hash keyed
46 by attribute names. Ordinary single-valued attributes are stored as strings,
47 multi-valued attributes as (references to) arrays of strings. When an object
48 contains sub-objects, they are stored as references to other hashes, possibly
49 encapsulated within a list if there can be multiple such objects. Most objects
50 have an extra attribute C<RAW> containing the original description of the
51 object, a sub-list of C<< $conn->{RAW} >>.
53 The parsed answer consists of three parts (please follow F<doc/search> to
54 get a better picture of what does the server answer): header C<< $conn->{HEADER} >>
55 (an object, as described above), footer C<< $conn->{FOOTER} >> (object) and document
56 cards C<< $conn->{CARDS} >> (a list of objects).
58 The I<header> contains all the standard header attributes and also C<< $hdr->{D} >>
59 which is a list of sub-objects, each corresponding to a single database and
60 containing per-database attributes like C<W> (word list).
62 The I<footer> is pretty straightforward and it just contains what you'd
65 Each I<card> contains the usual document attributes (see F<doc/objects> for
66 a list) plus C<< $card->{U} >> which is a list of sub-objects corresponding
67 to URL's of the document and containing per-URL attributes like C<U> (URL),
68 C<s> (original size) and C<T> (content type).
70 When in doubt, call the C<print> method which will print the whole contents
71 of the connection object. It's actually a much more general (but pretty
72 simple due to Perl being able to be a very introspective language) routine
73 usable for dumping any acyclic Perl data structure composed of strings,
74 hashes, arrays and references to them. You can access this general routine
75 by calling C<format({ print; }, $what)> which dumps C<$what> and for
76 each line of output it calls the given subroutine.
80 A good example of use of this module is the C<query> utility and
81 of course the example front-end (F<front-end/query.cgi>).
85 Martin Mares <mj@ucw.cz>
89 package Sherlock::Query;
96 sub do_parse_tree($$$$);
100 my $class = shift @_;
101 my $server = shift @_;
110 my ($q,$string) = @_;
114 my $sock = IO::Socket::INET->new(PeerAddr => $q->{SERVER}, Proto => 'tcp')
115 or return "-900 Cannot connect to search server: $!";
116 print $sock $string, "\n";
121 $stat =~ /^[+-]/ or return "-901 Reply parse error";
132 if (!defined $block) {
134 push @{$q->{RAW}}, $block;
177 our $footer_syntax = {
182 my ($q,$string) = @_;
184 # Send the query and gather results
185 my $stat = $q->command($string);
186 my @raw = @{$q->{RAW}};
188 # Split results to header, cards and footer
189 $q->{HEADER} = { RAW => [] };
190 if (@raw) { $q->{HEADER}{RAW} = shift @raw; }
191 elsif (!$stat) { return "-902 Incomplete reply"; }
192 $q->{FOOTER} = { RAW => [] };
193 if (@raw && $raw[@raw-1]->[0] =~ /^\+/) {
194 $q->{FOOTER}{RAW} = pop @raw;
198 push @{$q->{CARDS}}, { RAW => shift @raw };
202 parse_tree($q->{HEADER}, $hdr_syntax);
203 foreach my $c (@{$q->{CARDS}}) {
204 parse_tree($c, $card_syntax);
206 parse_tree($q->{FOOTER}, $footer_syntax);
213 my $syntax = shift @_;
214 do_parse_tree($tree->{RAW}, 0, $tree, $syntax);
217 sub do_parse_tree($$$$) {
220 my $cooked = shift @_;
221 my $syntax = shift @_;
224 $raw->[$i] =~ /^(.)(.*)/;
225 if (!defined($syntax->{$1}) && !defined($syntax->{''})) { return $i; }
226 if (ref $syntax->{$1} eq "ARRAY") {
227 push @{$cooked->{$1}}, $2;
229 } elsif (ref $syntax->{$1} eq "HASH") {
231 push @{$cooked->{$1}}, $block;
232 $i = do_parse_tree($raw, $i, $block, $syntax->{$1});
234 $cooked->{$1} = $2 if !defined($cooked->{$1});
241 sub format_tree($$$) {
242 my ($func, $a, $indent) = @_;
243 if (ref $a eq "ARRAY") {
244 if (@{$a} == 0) { &$func("[]\n"); }
247 foreach my $k (@{$a}) {
249 format_tree($func, $k, "$indent\t");
251 &$func($indent . "]\n");
255 foreach my $k (sort keys %{$a}) {
256 &$func("$indent\t$k => ");
257 format_tree($func, $a->{$k}, "$indent\t");
259 &$func($indent . "}\n");
260 } elsif (defined $a) {
268 my ($q, $func, $what) = @_;
269 format_tree($func, $what, "");
274 format_tree(sub { print $_[0]; }, $q, "");