1 # Perl module for sending queries to Sherlock search servers and parsing answers
3 # (c) 2002--2003 Martin Mares <mj@ucw.cz>
5 # This software may be freely distributed and used according to the terms
6 # of the GNU Lesser General Public License.
10 Sherlock::Query -- Communication with Sherlock Search Servers
14 This perl library offers a simple interface for connecting to Sherlock
15 search servers, sending queries or control commands and parsing the
18 First of all, you have to use
20 my $conn = new Sherlock::Query('server:port');
22 to create a new connection object (unconnected yet). Then you can call
24 my $res = $conn->command('command');
26 to establish the connection, send a given command to the search server
27 and gather the results (see below) or, if you want to send a normal query,
29 my $res = $conn->query('"simple" OR "query"');
31 which does the same as C<< $conn->command(...) >>, but it also parses the
32 results to a representation convenient for handling in Perl programs
35 Currently, you can use a single connection to send only a single command or query.
39 The I<raw answer> of the search server (i.e., the lines it has returned) is always
40 available as C<< $conn->{RAW} >> as a list of strings, each representing a single
43 Parsed results of queries are stored in a more complicated way, but before
44 explaining it, let's mention a couple of axioms: Any search server I<object>
45 (header, footer, a single document of answer) is always stored as a hash keyed
46 by attribute names. Ordinary single-valued attributes are stored as strings,
47 multi-valued attributes as (references to) arrays of strings. When an object
48 contains sub-objects, they are stored as references to other hashes, possibly
49 encapsulated within a list if there can be multiple such objects. Most objects
50 have an extra attribute C<RAW> containing the original description of the
51 object, a sub-list of C<< $conn->{RAW} >>.
53 The parsed answer consists of three parts (please follow F<doc/search> to
54 get a better picture of what does the server answer): header C<< $conn->{HEADER} >>
55 (an object, as described above), footer C<< $conn->{FOOTER} >> (object) and document
56 cards C<< $conn->{CARDS} >> (a list of objects).
58 The I<header> contains all the standard header attributes and also C<< $hdr->{D} >>
59 which is a list of sub-objects, each corresponding to a single database and
60 containing per-database attributes like C<W> (word list).
62 The I<footer> is pretty straightforward and it just contains what you'd
65 Each I<card> contains the usual document attributes (see F<doc/objects> for
66 a list) plus C<< $card->{U} >> which is a list of sub-objects corresponding
67 to URL's of the document and containing per-URL attributes like C<U> (URL),
68 C<s> (original size) and C<T> (content type).
70 When in doubt, call the C<print> method which will print the whole contents
71 of the connection object. It's actually a much more general (but pretty
72 simple due to Perl being able to be a very introspective language) routine
73 usable for dumping any acyclic Perl data structure composed of strings,
74 hashes, arrays and references to them. You can access this general routine
75 by calling C<format({ print; }, $what)> which dumps C<$what> and for
76 each line of output it calls the given subroutine.
80 A good example of use of this module is the C<query> utility and
81 of course the example front-end (F<front-end/query.cgi>).
85 Martin Mares <mj@ucw.cz>
89 package Sherlock::Query;
96 sub do_parse_tree($$$$);
100 my $class = shift @_;
101 my $server = shift @_;
110 my ($q,$string) = @_;
114 my $sock = IO::Socket::INET->new(PeerAddr => $q->{SERVER}, Proto => 'tcp')
115 or return "-900 Cannot connect to search server: $!";
116 print $sock $string, "\n";
120 $stat = "-903 Incomplete reply" if !defined $stat;
122 $stat =~ /^[+-]/ or return "-901 Reply parse error";
133 if (!defined $block) {
135 push @{$q->{RAW}}, $block;
164 our $footer_syntax = {
168 my ($q,$string) = @_;
170 # Send the query and gather results
171 my $stat = $q->command($string);
172 my @raw = @{$q->{RAW}};
174 # Split results to header, cards and footer
175 $q->{HEADER} = { RAW => [] };
176 if (@raw) { $q->{HEADER}{RAW} = shift @raw; }
177 elsif (!$stat) { return "-902 Incomplete reply"; }
178 $q->{FOOTER} = { RAW => [] };
179 if (@raw && $raw[@raw-1]->[0] =~ /^\+/) {
180 $q->{FOOTER}{RAW} = pop @raw;
184 push @{$q->{CARDS}}, { RAW => shift @raw };
188 parse_tree($q->{HEADER}, $hdr_syntax);
189 foreach my $c (@{$q->{CARDS}}) {
190 parse_tree($c, $card_syntax);
192 parse_tree($q->{FOOTER}, $footer_syntax);
199 my $syntax = shift @_;
200 do_parse_tree($tree->{RAW}, 0, $tree, $syntax);
203 sub do_parse_tree($$$$) {
206 my $cooked = shift @_;
207 my $syntax = shift @_;
210 $raw->[$i] =~ /^([^(]|\(.)(.*)/;
213 } elsif (!defined($syntax->{$1})) {
214 $cooked->{$1} = $2 if !defined($cooked->{$1});
216 } elsif (ref $syntax->{$1} eq "ARRAY") {
217 push @{$cooked->{$1}}, $2;
219 } elsif (ref $syntax->{$1} eq "HASH") {
221 push @{$cooked->{$1}}, $block;
222 $i = do_parse_tree($raw, $i+1, $block, $syntax->{$1});
228 sub format_tree($$$) {
229 my ($func, $a, $indent) = @_;
230 if (ref $a eq "ARRAY") {
231 if (@{$a} == 0) { &$func("[]\n"); }
234 foreach my $k (@{$a}) {
236 format_tree($func, $k, "$indent\t");
238 &$func($indent . "]\n");
242 foreach my $k (sort keys %{$a}) {
243 &$func("$indent\t$k => ");
244 format_tree($func, $a->{$k}, "$indent\t");
246 &$func($indent . "}\n");
247 } elsif (defined $a) {
255 my ($q, $func, $what) = @_;
256 format_tree($func, $what, "");
261 format_tree(sub { print $_[0]; }, $q, "");