DOM based TagStripper

import org.apache.xerces.parsers.*;
import org.apache.xerces.dom.*;
import org.w3c.dom.*;
import org.w3c.dom.traversal.*;
import org.xml.sax.SAXException;
import java.io.IOException;


public class DOMTagStripper {

  public static void main(String[] args) {
     
    DOMParser parser = new DOMParser();
    
    for (int i = 0; i < args.length; i++) {
      try {
        // Read the entire document into memory
        parser.parse(args[i]); 
       
        Document doc = parser.getDocument();
        DocumentImpl impl = (DocumentImpl) doc;
        NodeIterator iterator = impl.createNodeIterator(
         doc.getDocumentElement(), NodeFilter.SHOW_TEXT, null, true
        );
        Node node;
        while ((node = iterator.nextNode()) != null) {
          System.out.print(node.getNodeValue());      
        }
      }
      catch (SAXException e) {
        System.err.println(e); 
      }
      catch (IOException e) {
        System.err.println(e); 
      }
      
    }
  
  } // end main

}

Previous | Next | Top | Cafe con Leche

Copyright 2000-2004 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified March 15, 2000