1     #!/usr/bin/perl -w
2     use strict;
3     $|=1;
4     
5     use LWP::UserAgent;
6     use HTTP::Cookies;
7     use HTTP::Request::Common;
8     use HTML::LinkExtor;
9     
10    my %LINKS                 # subset of %HTML::Tagset::linkElements
11    (
12     'applet'  => ['archive', 'codebase', 'code'],
13     'bgsound' => ['src'],
14     'body'    => ['background'],
15     'embed'   => ['src'],
16     'frame'   => ['src'],
17     'iframe'  => ['src'],
18     'ilayer'  => ['background'],
19     'img'     => ['src', 'lowsrc'],
20     'input'   => ['src'],
21     'layer'   => ['background', 'src'],
22     ## 'link'    => ['href'], ## durn, some of these are stylesheets
23     'script'  => ['src'],
24     'table'   => ['background'],
25     'td'      => ['background'],
26     'th'      => ['background'],
27     'tr'      => ['background'],
28    );
29    
30    my $ua = LWP::UserAgent->new;
31    $ua->env_proxy;
32    $ua->agent("dltime/1.00 ".$ua->agent); # identify ourselves
33    $ua->cookie_jar(HTTP::Cookies->new); # capture cookies if needed
34    
35    report($_) for @ARGV;
36    
37    exit 0;
38    
39    sub report {
40      my $start = shift;
41    
42      my @todo = ["", $start];
43      my %done;
44    
45      while (@todo) {
46        my ($refer, $url) = @{shift @todo};
47        next if exists $done{$url};
48    
49        my $request = GET $url, [referer => $refer];
50        my $response = $ua->simple_request($request);
51    
52        if ($response->is_success) {
53          $done{$url} = length (my $content = $response->content);
54    
55          next if $response->content_type ne "text/html";
56    
57          my $base = $response->base; # relative URLs measured 
                                        # relative to here
58          my $p = HTML::LinkExtor->new(undef, $base) or die;
59          $p->parse($content);
60          $p->eof;
61          for my $link ($p->links) {
62            my ($tag, %attr) = @$link;
63            if ($LINKS{$tag}) {
64              for (@{$LINKS{$tag}}) {
65                next unless exists $attr{$_};
66                next unless length (my $a = $attr{$_});
67                ## print "$base $tag $_ => $a\n"; ## debug
68                push @todo, [$base, $a];
69              }
70            }
71          }
72          
73        } elsif ($response->is_redirect) {
74          $done{$url} = length $response->content; # this counts
75          my $location = $response->header('location') or next;
76          push @todo, [$url, $location]; # but get this too
77        } elsif ($response->is_error) {
78          print "$url ERROR: ", $response->status_line, "\n";
79        }
80    
81      }                             # end of outer loop
82    
83      {
84        my $total = 0;
85    
86        print "$start =>\n";
87        for my $url (sort { $done{$b} <=> $done{$a} } keys %done) {
88          $total += $done{$url};
89          printf "  %10d  %s\n", $done{$url}, $url;
90        }
91        printf "  %10d TOTAL\n", $total;
92        printf "  %10.0f seconds at 28.8\n\n", $total/2000;
93      }
94    
95    }