1  #!/home/merlyn/bin/perl -w
2  $|++;
3  use strict;
4  use File::Basename;
5  use File::Copy;
6  use File::Find;
7  use File::Path;
8  use IO::File;
9  
10 my $TOP = "/home/merlyn/Html/merlyn/WebTechniques";
11 my $TOP_URL = "http://www.stonehenge.com/merlyn/WebTechniques";
12 my $OUT = "/home/merlyn/Web/Reltree/Out";
13 
14 sub WITHIN {
15   $_[0] =~ m{^\Qhttp://www.stonehenge.com/merlyn/WebTechniques/\E }xs;
16 }
17 
18 ## "use MyParser;" ##
19 BEGIN {
20   package MyParser;
21   use HTML::Parser;
22   use HTML::Entities ();
23   use URI::URL;
24 
25   use vars qw(@ISA);
26   @ISA = qw(HTML::Parser);
27 
28   sub new {
29     my $pack = shift;
30     my $self = $pack->SUPER::new;
31     @{$self}{qw(__base __out __within)} = @_;
32     $self;
33   }
34 
35   sub declaration {
36     my $self = shift;
37     my ($decl) = @_;
38     $self->{__out}->print("<!$decl>");
39   }
40 
41   sub start {
42     my $self = shift;
43     my ($tag, $attr, $attrseq, $origtext) = @_;
44     my $out = $self->{__out};
45     $out->print("<$tag");
46     for (keys %$attr) {
47       $out->print(" $_=\"");
48       my $val = $attr->{$_};
49       if ("$tag $_" =~ /^(a href|img src)$/) {
50         $val = url($val)->abs($self->{__base},1);
51         if ($self->{__within}->($val)) {
52           $val = $val->rel($self->{__base});
53         }
54       }
55       $out->print(HTML::Entities::encode($val, '<>&"'));
56       $out->print('"');
57     }
58     $out->print(">");
59   }
60 
61   sub end {
62     my $self = shift;
63     my ($tag) = @_;
64     $self->{__out}->print("</$tag>");
65   }
66 
67   sub text {
68     my $self = shift;
69     my ($text) = @_;
70     $self->{__out}->print("$text");
71   }
72 
73   sub comment {
74     my $self = shift;
75     my ($comment) = @_;
76     $self->{__out}->print("<!-- $comment -->");
77   }
78 
79 }
80 ## end "use MyParser;" ##
81   
82 find(\&translate, "$TOP/./");
83 
84 sub translate {
85   return unless -f;
86   (my $rel_name = $File::Find::name) =~ s{ .*/\./ }{}xs;
87   my $src_url = "$TOP_URL/$rel_name";
88   my $out_file = "$OUT/$rel_name";
89   mkpath dirname($out_file), 1;
90   print "... $out_file\n";
91   if (/\.html$/) {
92     my $out = IO::File->new(">$out_file")
93       or die "Cannot create $out_file: $!";
94     MyParser->new($src_url,$out,\&WITHIN)->parse_file($_);
95   } else {
96     copy $_, $out_file;
97   }
98 }