SAX based XMLPrettyPrinter


import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;


public class XMLPrettyPrinter2 implements DocumentHandler {

  protected Writer out;
  protected int depth = 0;  // depth in hierarchy
  

  // I could allow the user to set a lot more details about
  // how the XML is indented; e.g. how many spaces, tabs or spaces,
  // etc.; but since this wouldn't add anything to the discussion
  // of XML I'll leave it as an exercise for the student
  
  public XMLPrettyPrinter2(Writer out) {
    this.out = out;
  }

  public XMLPrettyPrinter2(OutputStream out) {
    try {
      this.out = new OutputStreamWriter(out, "UTF-8");
    }
    catch (UnsupportedEncodingException e) {
      System.out.println(
       "Something is seriously wrong."
       + " Your VM does not support UTF-8 encoding!"); 
    }
  }

  public void setDocumentLocator(Locator locator) {}
  
  public void startDocument() throws SAXException {
    
    depth = 0; // so instance can be reused
    try {
      out.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
    }
    catch (IOException e) {
      throw new SAXException(e);
    }
     
  }

  public void endDocument() throws SAXException {
    try {
      out.flush();  
    }
    catch (IOException e) {
      throw new SAXException(e);
    }       
  }
  
  public void startElement(String name, AttributeList atts)
   throws SAXException {
    
    try {
      flushTextBuffer();
      indent();
      out.write("<" + name + ">\r\n");
      depth++;
    }
    catch (IOException e) {
      throw new SAXException(e);
    } 
    
  }
  
  public void endElement(String name) throws SAXException {
   
    try {
      flushTextBuffer();
      depth--;
      indent();
      out.write("</" + name + ">\r\n");   
    }
    catch (IOException e) {
      throw new SAXException(e);
    } 
  }
  
  private StringBuffer textBuffer = new StringBuffer();
  
  protected void flushTextBuffer() throws IOException {
    
    if (textBuffer.length() > 0) {
  
      indent();
      out.write(textBuffer.toString());
      textBuffer = new StringBuffer();
      out.write("\r\n"); 
    }
    
  }
 
 // I could have word wrapped the buffer writing it out but since 
 // that's just a lot of String processing code that really doesn't 
 // say anything about XML I'll leave it as an exercise. 
 
  
  public void characters(char[] text, int start, int length) 
   throws SAXException {
    for (int i = start; i < start+length; i++) {
      switch (text[i]) {
        case '\r': 
          textBuffer.append(' ');
          break;  
        case '\n': 
          textBuffer.append(' ');
          break;  
        case '&': 
          textBuffer.append("&amp;");
          break;  
        case '<': 
          textBuffer.append("&lt;");
          break;
        default:  
          textBuffer.append(text[i]);
      }
    }
  }
  
  public void ignorableWhitespace(char[] text, int start, int length)
   throws SAXException {
    // ignore ignorable white space
  }
  
  public void processingInstruction(String target, String data)
   throws SAXException {
    try {
      flushTextBuffer();
      indent();
      out.write("<?" + target + " " + data + "?>\r\n"); 
    }
    catch (IOException e) {
      throw new SAXException(e);
    }    
  }

  protected void indent() throws IOException {
    
    int spaces = 2; // number of spaces to indent
    
    for (int i = 0; i < depth*spaces; i++) {
      out.write(' ');
    }    
  }


  // Could easily have put main() method in a separate class
  public static void main(String[] args) {
    
    Parser parser;
    try {
     parser = ParserFactory.makeParser();
    }
    catch (Exception e) {
      // fall back on Xerces parser by name
      try {
        parser = ParserFactory.makeParser(
         "org.apache.xerces.parsers.SAXParser");
      }
      catch (Exception ee) {
        System.err.println("Couldn't locate a SAX parser");
        return;          
      }
    }
     
    if (args.length == 0) {
      System.out.println(
       "Usage: java XMLPrettyPrinter2 URL1 URL2..."); 
    } 
      
    // Install the Document Handler      
    parser.setDocumentHandler(new XMLPrettyPrinter2(System.out));
    
    // start parsing... 
    for (int i = 0; i < args.length; i++) {
      
      try {
        parser.parse(args[i]);
      }
      catch (SAXParseException e) { // well-formedness error
        System.out.println(args[i] + " is not well formed.");
        System.out.println(e.getMessage()
         + " at line " + e.getLineNumber() 
         + ", column " + e.getColumnNumber());
      }
      catch (SAXException e) { // some other kind of error
        System.out.println(e.getMessage());
      }
      catch (IOException e) {
        System.out.println("Could not report on " + args[i] 
         + " because of the IOException " + e);
      }
      
    }  
  
  }

}

Previous | Next | Top | Cafe con Leche

Copyright 2000 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified February 15, 2000