JAXP Example

import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import java.io.IOException;
import java.util.StringTokenizer;


public class JAXPWordCount {

  public static void main(String[] args) {
     
    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      DocumentBuilder parser = factory.newDocumentBuilder();
      JAXPWordCount counter = new JAXPWordCount();
    
      for (int i = 0; i < args.length; i++) {
        try {
          // Read the entire document into memory
          Document d = parser.parse(args[i]); 
          int numWords = countWordsInNode(d);
          System.out.println(numWords + " words");
        }
        catch (SAXException e) {
          System.err.println(e); 
        }
        catch (IOException e) {
          System.err.println(e); 
        }
      } // end for
    } // end try
    catch (ParserConfigurationException e) {
      System.err.println(
       "No parser suporting JAXP could be found in the local class path."); 
    }
  
  } // end main

  // note use of recursion
  public static int countWordsInNode(Node node) {
    
    int numWords = 0;
    
    if (node.hasChildNodes()) {
      NodeList children = node.getChildNodes();
      for (int i = 0; i < children.getLength(); i++) {
        numWords += countWordsInNode(children.item(i));
      } 
    }  

    int type = node.getNodeType();
    if (type == Node.TEXT_NODE) {
      String s = node.getNodeValue();
      numWords += countWordsInString(s);
    }
    
    return numWords;  
    
  }
  
  private static int countWordsInString(String s) {
    
    if (s == null) return 0;
    s = s.trim();
    if (s.length() == 0) return 0;
    
    StringTokenizer st = new StringTokenizer(s);
    return st.countTokens();
    
  } 

}
% java JAXPWordCount hotcop.xml
16 words

Previous | Next | Top | Cafe con Leche

Copyright 2000-2002 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified April 4, 2002