1 #!/usr/bin/perl -w 2 use strict; 3 $|++; 4 5 ## config 6 7 my @URL = qw(http://www.stonehenge.Xcom/); 8 9 sub OK_TO_FOLLOW { 10 my $uri = shift; # URI object, known to be http only 11 for ($uri->host) { 12 return 0 unless /\.stonehenge\.Xcom$/i; 13 } 14 for ($uri->query) { 15 return 0 if defined $_ and length; 16 17 for ($uri->path) { 18 return 0 if /^\/(cgi|fors|-)/; 19 return 0 if /col\d\d|index/; 20 return 0 if /Pictures/; 21 return 0 unless /(\.html?|\/)$/; 22 } 23 return 1; 24 } 25 26 ## end config 27 28 use WWW::Robot; 29 use LWP::UserAgent; 30 use CGI::Pretty qw(-no_debug :html); 31 use HTML::Entities; 32 33 my %description; 34 my %keywords; 35 my %keyword_caps; 36 37 my $robot = WWW::Robot->new 38 ( 39 NAME => 'MetaBot', 40 VERSION => '0.15', 41 EMAIL => 'merlyn@stonehenge.Xcom', 42 USERAGENT => LWP::UserAgent->new, 43 CHECK_MIME_TYPES => 0, 44 ## VERBOSE => 1, 45 ); 46 47 $robot->env_proxy; 48 49 $robot->addHook 50 ("follow-url-test" => sub { 51 my ($robot, $hook, $url) = @_; 52 return 0 unless $url->scheme eq 'http'; 53 OK_TO_FOLLOW($url); 54 }); 55 $robot->addHook 56 ("invoke-on-contents" => sub { 57 my ($robot, $hook, $url, $response, $structure) = @_; 58 my %meta = map { 59 my $header = $response->header("X-Meta-$_"); 60 defined $header ? ($_, $header) : (); 61 } qw(Description Keywords); 62 return unless %meta; 63 if (exists $meta{Description}) { 64 $_ = $meta{Description}; 65 tr/ \t\n/ /s; 66 $description{$url} = $_; 67 } 68 if (exists $meta{Keywords}) { 69 for (split /,/, $meta{Keywords}) { 70 s/^\s+//; 71 s/\s+$//; 72 $keywords{lc $_}{$url}++; 73 $keyword_caps{lc $_} = $_; 74 } 75 } 76 }); 77 $robot->run(@URL); 78 79 my %seen_letter; 80 81 print 82 table({ Cellspacing => 0, Cellpadding => 10, Border => 2 }, 83 do { 84 my %letters; 85 @letters{map /^([a-z])/, keys %keywords} = (); 86 %letters ? 87 Tr(td({Colspan => 3}, 88 p("Jump to:", 89 map a({Href => "#index_$_"}, uc $_), sort keys %letters))) 90 : 0; 91 }, 92 map { 93 my $key = $_; 94 my @value = 95 map { 96 my $url = $_; 97 my $text = exists $description{$url} ? 98 $description{$url} : "(no description provided)"; 99 100 [a({Href => encode_entities($url)}, encode_entities($url)), 101 encode_entities($text), 102 ]; 103 } sort keys %{$keywords{$key}}; 104 my $key_text = $keyword_caps{$key}; 105 if ($key =~ /^([a-z])/ and not $seen_letter{$1}++ ) { 106 $key_text = a({ Name => "index_$1" }, $key_text); 107 } 108 109 map { 110 Tr(($_ > 0 ? () : td({Rowspan => scalar @value}, $key_text)), 111 td($value[$_])); 112 } 0..$#value; 113 } sort keys %keywords 114 );