wikipedia

Support Wikipedia

Monday, January 4, 2010

XML parsing in Java and Groovy

XML is everywhere. Somebody said that XML is like violence. if it doesn't work for you then you are not using enough of it! Well like it or not you have to deal with XML everywhere. To say that XML handling in java is not easy is an understatement. What if you have to deal with SAX, DOM parsers, etc. Well there are some libraries out there which make the job a little easier. Like XOM, XSTREAM, etc. But what if you want to read an XML config file into a Java object?

XSTREAM has a fairly simple way of doing that. Here is an example.

One other option is to look at the many jvm based languages like groovy, scala, jruby, etc. They all handle XML super easy. Let's see How easy it is in Groovy.
Groovy has two APIs XMLParser and XMLSlurper for dealing with XML.

Groovy lets you easily cut to the metadata instead of going through each node and getting the child and more.
Using a scripting language gives you a tradeoff for complexity and since it is run by the same jvm you don't loose any performance either. A call to groovy functionality can be embedded in java or invoked through a shell. That way any scripting language can be used.

The example here uses a simple xml data file (Cars.xml) shown below.

<records>
<carList>
      <car name='HSV Maloo' make='Holden' year='2006'>
        <country>Australia</country>
        <record type='speed'>Production Pickup Truck with speed of 271kph</record>
      </car>
      <car name='P50' make='Peel' year='1962'>
        <country>Isle of Man</country>
        <record type='size'>Smallest Street-Legal Car at 99cm wide and 59 kg in weight</record>
      </car>
      <car name='Royale' make='Bugatti' year='1931'>
        <country>France</country>
        <record type='price'>Most Valuable Car at $15 million</record>
      </car>
</carList>
</records>

The data structure to represent the XML data is defined in
Records.java.

package org.xml.example;

import java.util.List;

public class Records {
 private List carList = null;
 
 public static class Car {
  String name;
  String make;
  String year;
  Country country;
  Record record;
  
  public static class Record {
   String type;
   String info;
   
   public String toString(){
    return "Record type:" + type + ", info:" + info;
   }
  }


  public static class Country {
   String name;
   
   public String toString(){
    return "Country: " + name;
   }
  }
  
  public String toString(){
   return "name: " + name + ", make:" + make + ", year:" + year + " " 
   + country + " " + record;
  }
 }
 
 public List getCars() {
  return carList;
 } 
 
 public String toString(){
  StringBuilder sb = new StringBuilder();
  for (Car car : carList) {
   sb.append(car.toString());
   sb.append(System.getProperty("line.separator"));
  }
  return sb.toString();
 }
}

The main program is XMLParseExample.java.

package org.xml.example;

import groovy.lang.Binding;
import groovy.util.GroovyScriptEngine;

import java.io.FileNotFoundException;
import java.io.FileReader;

import com.thoughtworks.xstream.XStream;

public class XMLParseExample {

    public static void main(String[] args) throws FileNotFoundException {
//        parseXMLinGroovy();
        parseXMLinXstream();
    }

    private static void parseXMLinXstream() throws FileNotFoundException {
        XStream xstream = new XStream();
        
        xstream.aliasType("records", Records.class);
        xstream.aliasType("car", Records.Car.class);
        xstream.useAttributeFor(Records.Car.class, "name");
        xstream.useAttributeFor(Records.Car.class, "make");
        xstream.useAttributeFor(Records.Car.class, "year");
        xstream.aliasType("country", Records.Car.Country.class);
        xstream.aliasType("record", Records.Car.Record.class);
        xstream.useAttributeFor(Records.Car.Record.class, "type");
                
        Records records = (Records)xstream.fromXML(new FileReader("/home/csrinivasan/Documents/Nigeria/Cars.xml"));
        String xml = xstream.toXML(records);
        System.err.println(xml);
    }

    private static void parseXMLinGroovy()  {
        String[] roots = new String[] { "./scripts/" };
        GroovyScriptEngine gse;
        try {
            gse = new GroovyScriptEngine(roots);
            Binding binding = new Binding();
            gse.run("ParseXML.groovy", binding);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

The groovy script is ParseXML.groovy.

import org.xml.example.*;
import org.xml.example.Records.Car.*

class ParseRecords {
    private List cars

  ParseRecords () {
      cars = new ArrayList()
  }

  def parseRecords() {
    ClassLoader sysClassLoader = ClassLoader.getSystemClassLoader()
      def data = sysClassLoader.getResource("Cars.xml").text
      def xmlSlurper = new XmlParser()
      def records = xmlSlurper.parseText(data)
      Records recs = new Records()
    records.carList.each {
        recs.carList = new ArrayList()
        it.car.each {
            Records.Car car = new Records.Car()
            car.name = it.attribute("name")
            car.make = it.attribute("make")
            car.year = it.attribute("year")
            car.country = new Records.Car.Country()
            car.country.name = it.country.text()
            car.record = new Records.Car.Record()
            it.record.each {
                car.record.type = it.attribute("type")
            }
            car.record.info = it.record.text()
            recs.carList.add(car)
        }
    }
    println recs
  }

ParseRecords algConfig = new ParseRecords()
algConfig.parseRecords()