读取xml的最快和最优化的方法

我有以下XML文件

  Kiran Pai 22   Bill Gates 46   Steve Jobs 40   

现在,从XML文件读取数据的Java程序如下所示。

 import java.io.File; import org.w3c.dom.Document; import org.w3c.dom.*; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; public class ReadAndPrintXMLFile{ public static void main (String argv []){ try { DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); Document doc = docBuilder.parse (new File("book.xml")); // normalize text representation doc.getDocumentElement ().normalize (); System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName()); NodeList listOfPersons = doc.getElementsByTagName("person"); int totalPersons = listOfPersons.getLength(); System.out.println("Total no of people : " + totalPersons); for(int s=0; s<listOfPersons.getLength() ; s++){ Node firstPersonNode = listOfPersons.item(s); if(firstPersonNode.getNodeType() == Node.ELEMENT_NODE){ Element firstPersonElement = (Element)firstPersonNode; //------- NodeList firstNameList = firstPersonElement.getElementsByTagName("first"); Element firstNameElement = (Element)firstNameList.item(0); NodeList textFNList = firstNameElement.getChildNodes(); System.out.println("First Name : " + ((Node)textFNList.item(0)).getNodeValue().trim()); //------- NodeList lastNameList = firstPersonElement.getElementsByTagName("last"); Element lastNameElement = (Element)lastNameList.item(0); NodeList textLNList = lastNameElement.getChildNodes(); System.out.println("Last Name : " + ((Node)textLNList.item(0)).getNodeValue().trim()); //---- NodeList ageList = firstPersonElement.getElementsByTagName("age"); Element ageElement = (Element)ageList.item(0); NodeList textAgeList = ageElement.getChildNodes(); System.out.println("Age : " + ((Node)textAgeList.item(0)).getNodeValue().trim()); //------ }//end of if clause }//end of for loop with s var }catch (SAXParseException err) { System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ()); System.out.println(" " + err.getMessage ()); }catch (SAXException e) { Exception x = e.getException (); ((x == null) ? e : x).printStackTrace (); }catch (Throwable t) { t.printStackTrace (); } //System.exit (0); }//end of main } 

结果是……

 Root element of the doc is book Total no of people : 3 First Name : Kiran Last Name : Pai Age : 22 First Name : Bill Last Name : Gates Age : 46 First Name : Steve Last Name : Jobs Age : 40 

现在我的查询是请建议有没有其他方法,这是最快的阅读这个xml,我正在寻找紧张,请建议.. !!

下面使用ReadAndPrintXMLFileWithStAX ,当我从gontard给出的答案与ReadAndPrintXMLFileWithSAX进行比较时,StAX方法更快。 我的测试涉及在JDK 1.7.0_07上为Mac运行示例代码500000次。

 ReadAndPrintXMLFileWithStAX: 103 seconds ReadAndPrintXMLFileWithSAX: 125 seconds 

ReadAndPrintXMLFileWithStAX(使用Java SE 7)

下面是使用XMLStreamReader而不是XMLEventReader的更优化的StAX(JSR-173)示例。

 import java.io.FileInputStream; import java.io.InputStream; import javax.xml.stream.*; public class ReadAndPrintXMLFileWithStAX { public static void main(String argv[]) throws Exception { XMLInputFactory inputFactory = XMLInputFactory.newInstance(); InputStream in = new FileInputStream("book.xml"); XMLStreamReader streamReader = inputFactory.createXMLStreamReader(in); streamReader.nextTag(); // Advance to "book" element streamReader.nextTag(); // Advance to "person" element int persons = 0; while (streamReader.hasNext()) { if (streamReader.isStartElement()) { switch (streamReader.getLocalName()) { case "first": { System.out.print("First Name : "); System.out.println(streamReader.getElementText()); break; } case "last": { System.out.print("Last Name : "); System.out.println(streamReader.getElementText()); break; } case "age": { System.out.print("Age : "); System.out.println(streamReader.getElementText()); break; } case "person" : { persons ++; } } } streamReader.next(); } System.out.print(persons); System.out.println(" persons"); } } 

产量

 First Name : Kiran Last Name : Pai Age : 22 First Name : Bill Last Name : Gates Age : 46 First Name : Steve Last Name : Jobs Age : 40 3 persons 

如果表演在你的情况下很重要,你应该更喜欢SAX或StAX(http://en.wikipedia.org/wiki/StAX)到DOM 。

使用DOM ,第一次将XML文件解析为对象模型,然后您可以询问它。 所以对你来说算法有两遍。

使用SAX ,在解析期间,会调用一些回调( startDocumentendElement …), SAX是基于事件的或推送模型

使用StAX ,您可以控制解析。 您将光标从元素移动到另一个元素。 这是拉模型

对于一个包含32910000人的文件,我将我的版本与SAX进行比较,以及与StAX的过度回答(Blaise Doughan)。 我删除所有System.out.println instrusctions。 我的程序花了106秒读取所有文件,另一个花了94秒。 我认为SAX较慢,因为即使它们什么都不做( 推模型 )也会调用所有callback ,而使用StAX时,光标仅在“interresting”元素( 拉模型 )上移动。

例如,使用java 7:

 import java.io.File; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class ReadAndPrintXMLFileWithSax { public static void main(String[] args) throws Exception { SAXParserFactory fabrique = SAXParserFactory.newInstance(); SAXParser parser = fabrique.newSAXParser(); File file = new File("book.xml"); BookHandler handler = new BookHandler(); parser.parse(file, handler); } public static class BookHandler extends DefaultHandler { private int count = 0; private StringBuilder buffer; @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { switch (qName) { case "person": count++; break; case "first": buffer = new StringBuilder("First Name : "); break; case "last": buffer = new StringBuilder("Last Name : "); break; case "age": buffer = new StringBuilder("Age : "); break; } } @Override public void characters(char[] ch, int start, int length) throws SAXException { String content = new String(ch, start, length); if (buffer != null) buffer.append(content); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { switch (qName) { case "first": case "last": case "age": System.out.println(buffer.toString()); break; } } @Override public void endDocument() throws SAXException { System.out.println(count + " persons"); } } } 

一个Stax例子

 import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; public class ReadAndPrintXMLFile { public static void main(String argv []) { String inputFile = "c:/source/book.xml"; try { // First create a new XMLInputFactory XMLInputFactory inputFactory = XMLInputFactory.newInstance(); // Setup a new eventReader InputStream in = new FileInputStream(inputFile); XMLEventReader eventReader = inputFactory.createXMLEventReader(in); // Read the XML document while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); if (event.isStartElement()) { StartElement startElement = event.asStartElement(); if (startElement.getName().getLocalPart().equals("first")) { event = eventReader.nextEvent(); StringBuilder fName = new StringBuilder(); while (!event.isEndElement()) { fName.append(event.asCharacters().getData()); event = eventReader.nextEvent(); } System.out.println("First Name : " + fName); event = eventReader.nextEvent(); continue; } if (startElement.getName().getLocalPart().equals("last")) { event = eventReader.nextEvent(); StringBuilder lName = new StringBuilder(); while (!event.isEndElement()) { lName.append(event.asCharacters().getData()); event = eventReader.nextEvent(); } System.out.println("Last Name : " + lName); event = eventReader.nextEvent(); continue; } if (startElement.getName().getLocalPart().equals("age")) { event = eventReader.nextEvent(); StringBuilder age = new StringBuilder(); while (!event.isEndElement()) { age.append(event.asCharacters().getData()); event = eventReader.nextEvent(); } System.out.println("Age : " + age); event = eventReader.nextEvent(); continue; } } } } catch (FileNotFoundException e) { System.out.println("File not Found: " + inputFile); } catch (XMLStreamException e) { e.printStackTrace(); } } } 

输出:

 First Name : Kiran Last Name : Pai Age : 22 First Name : Bill Last Name : Gates Age : 46 First Name : Steve Last Name : Jobs Age : 40