import java.io.*; import java.net.*; import java.util.*; public class AHParse { int lastchar=-2; StringBuffer current; public String parse(InputStream is) throws IOException { int c; int ender='<'; int endoffset=0; boolean intag=false; boolean multicomment=false; int dashct=0; c=lastchar; current=new StringBuffer(); if (c==-2) c=is.read(); if (c<0) return null; if (c=='<') { intag=true; endoffset=1; ender='>'; current.append((char)c); c=is.read(); if (c=='!') { current.append((char)c); c=is.read(); if (c=='-') { current.append((char)c); c=is.read(); if (c=='-') multicomment=true; } } } // read to end while ((c!=ender && !multicomment)|| (multicomment && c==ender && dashct!=2) || (multicomment && c!=ender)) { current.append((char)c); c=is.read(); if (c==-1) { endoffset=0; lastchar=-1; break; } if (lastchar=='-') dashct++; else dashct=0; lastchar=c; } while (endoffset--!=0) { current.append((char)c); lastchar=c=is.read(); } return current.toString(); } public void processURL(String urlstring) throws MalformedURLException,IOException { URL url=new URL(urlstring); InputStream is=url.openStream(); String token; do { token=parse(is); // pass null to doElement to indicate EOF if (!doElement(token)) break; } while (token!=null); is.close(); } // Override in subclass public boolean doElement(String token) { if (token==null) return true; System.out.println(token); System.out.println("###"); return true; // keep going } public static void main(String args[]) throws Exception { AHParse parser = new AHParse(); parser.processURL(args[0]); } }