1    #!/usr/bin/perl -w
2    use strict;
3    $|++;
4
5    ## config
6
7    my @URL = qw(http://www.stonehenge.Xcom/);
8
9    sub OK_TO_FOLLOW {
10     my $uri = shift;       # URI object, known to be http only
11     for ($uri->host) {
12       return 0 unless /\.stonehenge\.Xcom$/i;
13     }
14     for ($uri->query) {
15       return 0 if defined $_ and length;
16      
17     for ($uri->path) {
18       return 0 if /^\/(cgi|fors|-)/;
19       return 0 if /col\d\d|index/;
20       return 0 if /Pictures/;
21       return 0 unless /(\.html?|\/)$/;
22     }
23     return 1;
24   }
25
26   ## end config
27
28   use WWW::Robot;
29   use LWP::UserAgent;
30   use CGI::Pretty qw(-no_debug :html);
31   use HTML::Entities;
32
33   my %description;
34   my %keywords;
35   my %keyword_caps;
36
37   my $robot = WWW::Robot->new
38     (
39      NAME => 'MetaBot',
40      VERSION => '0.15',
41      EMAIL => 'merlyn@stonehenge.Xcom',
42      USERAGENT => LWP::UserAgent->new,
43      CHECK_MIME_TYPES => 0,
44      ## VERBOSE => 1,
45      );
46
47   $robot->env_proxy;
48
49   $robot->addHook
50     ("follow-url-test" => sub {
51        my ($robot, $hook, $url) = @_;
52        return 0 unless $url->scheme eq 'http';
53        OK_TO_FOLLOW($url);
54      });
55   $robot->addHook
56     ("invoke-on-contents" => sub {
57        my ($robot, $hook, $url, $response, $structure) = @_;
58        my %meta = map {
59          my $header = $response->header("X-Meta-$_");
60          defined $header ? ($_, $header) : ();
61        } qw(Description Keywords);
62        return unless %meta;
63        if (exists $meta{Description}) {
64          $_ = $meta{Description};
65          tr/ \t\n/ /s;
66          $description{$url} = $_;
67        }
68        if (exists $meta{Keywords}) {
69          for (split /,/, $meta{Keywords}) {
70            s/^\s+//;
71            s/\s+$//;
72            $keywords{lc $_}{$url}++;
73            $keyword_caps{lc $_} = $_;
74          }
75        }
76      });
77   $robot->run(@URL);
78
79   my %seen_letter;
80
81   print
82     table({ Cellspacing => 0, Cellpadding => 10, Border => 2 },
83           do {
84             my %letters;
85             @letters{map /^([a-z])/, keys %keywords} = ();
86             %letters ?
87               Tr(td({Colspan => 3},
88                     p("Jump to:",
89                       map a({Href => "#index_$_"}, uc $_), 
sort keys %letters)))
90                 : 0;
91           },
92           map {
93             my $key = $_;
94             my @value =
95               map {
96                 my $url = $_;
97                 my $text = exists $description{$url} ?
98                   $description{$url} : "(no description provided)";
99
100                [a({Href => encode_entities($url)}, 
                               encode_entities($url)),
101                 encode_entities($text),
102                ];
103              } sort keys %{$keywords{$key}};
104            my $key_text = $keyword_caps{$key};
105            if ($key =~ /^([a-z])/ and not $seen_letter{$1}++ ) {
106              $key_text = a({ Name => "index_$1" }, $key_text);
107            }
108
109            map {
110              Tr(($_ > 0 ? () : td({Rowspan => scalar @value}, 
$key_text)),
111                 td($value[$_]));
112              } 0..$#value;
113          } sort keys %keywords
114         );