"Programming With Perl"
by  Randal Schwartz
Web Techniques,  February 1998

Web Techniques grants permission to use these listings and code for private or
commercial use provided that credit to Web Techniques and the author is
maintained within the comments of the source. For questions, contact
editors@web-techniques.com.       

This file consists of one listing that accompany the article " Programming With Perl", 
published in the February 1998 issue of Web Techniques:


 

LISTING ONE



1  #!/home/merlyn/bin/perl -w
2  $|++;
3  use strict;
4  use File::Basename;
5  use File::Copy;
6  use File::Find;
7  use File::Path;
8  use IO::File;
9  
10 my $TOP = "/home/merlyn/Html/merlyn/WebTechniques";
11 my $TOP_URL = "http://www.stonehenge.com/merlyn/WebTechniques";
12 my $OUT = "/home/merlyn/Web/Reltree/Out";
13 
14 sub WITHIN {
15   $_[0] =~ m{^\Qhttp://www.stonehenge.com/merlyn/WebTechniques/\E }xs;
16 }
17 
18 ## "use MyParser;" ##
19 BEGIN {
20   package MyParser;
21   use HTML::Parser;
22   use HTML::Entities ();
23   use URI::URL;
24 
25   use vars qw(@ISA);
26   @ISA = qw(HTML::Parser);
27 
28   sub new {
29     my $pack = shift;
30     my $self = $pack->SUPER::new;
31     @{$self}{qw(__base __out __within)} = @_;
32     $self;
33   }
34 
35   sub declaration {
36     my $self = shift;
37     my ($decl) = @_;
38     $self->{__out}->print("<!$decl>");
39   }
40 
41   sub start {
42     my $self = shift;
43     my ($tag, $attr, $attrseq, $origtext) = @_;
44     my $out = $self->{__out};
45     $out->print("<$tag");
46     for (keys %$attr) {
47       $out->print(" $_=\"");
48       my $val = $attr->{$_};
49       if ("$tag $_" =~ /^(a href|img src)$/) {
50         $val = url($val)->abs($self->{__base},1);
51         if ($self->{__within}->($val)) {
52           $val = $val->rel($self->{__base});
53         }
54       }
55       $out->print(HTML::Entities::encode($val, '<>&"'));
56       $out->print('"');
57     }
58     $out->print(">");
59   }
60 
61   sub end {
62     my $self = shift;
63     my ($tag) = @_;
64     $self->{__out}->print("</$tag>");
65   }
66 
67   sub text {
68     my $self = shift;
69     my ($text) = @_;
70     $self->{__out}->print("$text");
71   }
72 
73   sub comment {
74     my $self = shift;
75     my ($comment) = @_;
76     $self->{__out}->print("<!-- $comment -->");
77   }
78 
79 }
80 ## end "use MyParser;" ##
81   
82 find(\&translate, "$TOP/./");
83 
84 sub translate {
85   return unless -f;
86   (my $rel_name = $File::Find::name) =~ s{ .*/\./ }{}xs;
87   my $src_url = "$TOP_URL/$rel_name";
88   my $out_file = "$OUT/$rel_name";
89   mkpath dirname($out_file), 1;
90   print "... $out_file\n";
91   if (/\.html$/) {
92     my $out = IO::File->new(">$out_file")
93       or die "Cannot create $out_file: $!";
94     MyParser->new($src_url,$out,\&WITHIN)->parse_file($_);
95   } else {
96     copy $_, $out_file;
97   }
98 }