import java.util.*;

public class WebParse extends AHParse {
    boolean first=true;
    boolean inscript=false;
    boolean inanchor=false;
    static String page;
    static Properties imageIgnore = new Properties();



    public void doHead() {
	System.out.println("<HTML><HEAD><TITLE>Clip from " + page+"</TITLE>");
	System.out.println("<BASE HREF=\"" + page + "\">");
	System.out.println("</HEAD><BODY>");
    }

    public void doEnd() {
	System.out.println("</BODY></HTML>");
    }


    // Assumes string is upper
    private String extractAttribute(String token,String tag,String defval) {
	String utoken=token.toUpperCase();
	int n=utoken.indexOf(tag),n1,n2;
	if (n==-1) {
	    return defval;
	}
	char match = ' ';
	n+=tag.length();
	if (utoken.charAt(n)=='"') {
	    match='"';
	    n++;
	} else if (utoken.charAt(n)=='\'') {
	    match='\'';
	    n++;
	}
        if (match==' ') {
          n1=utoken.indexOf(' ',n);
          n2=utoken.indexOf('>',n);
          if (n1==-1 || (n2!=-1 && n2<n1)) n1=n2;
          }
        else 
  	  n1=utoken.indexOf(match,n);
        if (n1==-1) return token.substring(n);  // technically an error!
	return token.substring(n,n1);
    }

    public boolean doElement(String token) {
	if (token==null) {
	    doEnd();
	    return true;
	}
	if (first) doHead();
	first=false;
	if (token.charAt(0)=='<') {
	    // tag
	    boolean pass=false;
	    String utoken=token.toUpperCase();
	    String tag = new StringTokenizer(utoken.substring(1),
					     " \t>").nextToken();
	    if (tag.equals("A")) {
		String hrefurl=extractAttribute(token,"HREF=","");
		if (!hrefurl.equals("")) {
		    if (hrefurl.length()>10 &&
			hrefurl.substring(0,10).compareToIgnoreCase("JAVASCRIPT")==0) 
			return true; // ignore javascript links
		}
		inanchor=true;
	    }
	    if (tag.equals("/A")) inanchor=false;
	    if (tag.equals("TABLE")) System.out.println("<BR>");
	    if (tag.equals("/TD")) System.out.println("&nbsp;&nbsp;");
	    if (tag.equals("/TR")) System.out.println("<BR>");
	    if (tag.equals("IMG")) {
		int n;
		String src=extractAttribute(token,"SRC=","");
		if (src.equals("")) return true; // ???
		n=src.lastIndexOf('/');
		String srcbase = n==-1?src:src.substring(n+1);
		if (imageIgnore.get(srcbase)!=null) return true;
		String alt=
		    extractAttribute(token,
				     "ALT=",srcbase);
		if (alt.equals("")) alt=srcbase; // real ALT=""
		if (!inanchor) {
		    System.out.println("<A HREF=\"" + 
				       src + "\">&lt;&lt;&lt;Image: "
				       +  alt + "&gt;&gt;&gt;</A>");
		}
		else {
		    System.out.println("&lt;&lt;&lt;Image: " + alt +
				       "&gt;&gt;&gt;");
		}
		return true;
	    }
	    if (tag.equals("SCRIPT")) inscript=true;
	    if (tag.equals("/SCRIPT")) inscript=false;
	    if (tag.charAt(0)=='/') tag=tag.substring(1);
	    if (tag.equals("B")||tag.equals("I")||tag.equals("U")) pass=true;
	    if (tag.equals("PRE")) pass=true;
	    if (tag.equals("P")) pass=true;
	    if (tag.equals("BR")) pass=true;
	    if (tag.equals("A")) pass=true;
	    if (pass) System.out.println(token);
	}
	else {
	    // text
	    if (!inscript) System.out.println(token);
	}
	return true;
    }

    public static void main(String [] args) throws Exception {
	page=args[0];
        for (int n=1;n<args.length;n++) 
 	   imageIgnore.put(args[n],"y");
	new WebParse().processURL(page);
    }
}