An XLink Spider Utility


import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import java.util.*;


public class XLinkSpider extends HandlerBase {

  public static Enumeration listURIs(String systemId) 
   throws SAXException, IOException {
    
    Parser parser;
    try {
     parser = ParserFactory.makeParser();
    }
    catch (Exception e) {
      // fall back on Xerces parser by name
      try {
        parser = ParserFactory.makeParser(
         "org.apache.xerces.parsers.SAXParser");
      }
      catch (Exception ee) {
        throw new SAXException(ee);
      }
    }
      
    // Install the Document Handler   
    XLinkSpider spider = new XLinkSpider();   
    parser.setDocumentHandler(spider);
    parser.parse(systemId);
    return spider.uris.elements();
      
  }
  
  private Vector uris = new Vector();

  public void startElement(String name, AttributeList attributes)
   throws SAXException {
    
     // This isn't really compliant since it doesn't pay
     // attention to namespaces
     String uri = attributes.getValue("xlink:href");
     if (uri != null) uris.addElement(uri);
    
  }
  

  public static void main(String[] args) {
    
    if (args.length == 0) {
      System.out.println("Usage: java XLinkSpider URL1 URL2..."); 
    } 
      
    // start parsing... 
    for (int i = 0; i < args.length; i++) {
      
      try {
        Enumeration uris = listURIs(args[i]);
        while (uris.hasMoreElements()) {
          String s = (String) uris.nextElement();
          System.out.println(s);
        }
      }
      catch (Exception e) {
        System.err.println(e);
        e.printStackTrace(); 
      }
      
    } // end for
  
  } // end main

} // end XLinkSpider

Previous | Next | Top | Cafe con Leche

Copyright 2000 Elliotte Rusty Harold
elharo@metalab.unc.edu
Last Modified February 15, 2000