"Programming in Perl"
by Randal L. Schwartz
Web Techniques, January 1997

Web Techniques grants permission to use these listings for private or 
commercial use provided that credit to Web Techniques and the author is 
maintained within the comments of the source. For questions, contact
editors@web-techniques.com.

[LISTING ONE]

1     #!/usr/bin/perl -Tw
2     use strict;
3     print STDERR
4       join (" ",
5             map "[$_]",
6             scalar localtime,
7             "404 ERROR",
8             map { $ENV{$_} || "-" }
9             qw(REDIRECT_URL REMOTE_HOST HTTP_REFERER)), 
10      "\n";
11    my $red_url = $ENV{REDIRECT_URL} || "?unknown?"; 
12    eval {
13      if ($red_url =~ /^\/~.*/s) { 
14        my $html = $red_url;
15        $html =~ s/[\x00-\x20"<&>"\x80-\xff]/&\#@{[ord$&]}\;/g; 
16        my $tp_html = "http://www.teleport.com$html";
17        print <<DQ;
18    Content-type: text/html
19    Status: 404 Not Found
20    
21    <HEAD><TITLE>File Not found</TITLE></HEAD> 
22    <BODY><H1>File Not found</H1>
23    The requested URL $html was not found on this server.<P>
24    Perhaps you were looking for something at Teleport's web-server, 
25    such as <A HREF="$tp_html">$tp_html</a>?
26    </BODY>
27    DQ
28        exit 0;
29      }
30    };
31    print <<"DQ";
32    Content-type: text/html
33    Status: 404 Not Found
34    
35    <HEAD><TITLE>File Not found</TITLE></HEAD> 
36    <BODY><H1>File Not found</H1>
37    The requested URL $red_url was not found on this server.<P>
38    Try looking at the <a href="http://www.stonehenge.com/">home page</a>. 
39    <!-- This is a custom message. -->
40    </BODY>
41    DQ


[LISTING TWO] 

1     #!/usr/bin/perl
2     use strict;
3     $|=1;
4     
5     use LWP::Simple;
6     
7     my $LOGDIR = "/home/merlyn/Logs"; 
8     @ARGV = (
9              (map { "gunzip <$_|" } <$LOGDIR/error_log.*[0-9].gz>), 
10             <$LOGDIR/error_log.*[0-9]>,
11             "$LOGDIR/error_log", 
12            );
13    
14    my %seen;
15    while (<>) {
16      next unless /404 ERROR/;
17      s/^\[//;
18      s/\]\s*$//;
19      my @fields = split /\] \[/;
20      my ($time, $wanted, $ref) = @fields[0,2,4];
21      next unless $ref =~ /^http:/; # solid HTTP fetch 
22      next if $ref =~ /\?/;         # no CGI searches 
23      next if $seen{$ref}++;        # once only
24      print "[$time $wanted $ref]\n"; 
25      my $content = get $ref;
26      unless (defined $content) {
27        print "... content not available\n"; 
28        next;
29      }
30      my @stonehenge = $content =~ /(\S*stonehenge\S*)/mig; 
31      if (@stonehenge) {
32        print map "  hit: $_\n", @stonehenge;
33        print map "  mailto: $_\n", $content =~ /(\S*mailto:\S*)/mg; 
34      }
35    }