kannanrbk
kannanrbk

Reputation: 7134

XML parse node value as string

My xml file is look like this . I want to get the value node text content as like this .

<property regex=".*" xpath=".*">
     <value>
          127.0.0.1
     </value>
<property regex=".*" xpath=".*">
<value>

</value>
</property>

I want to get text as order they specified in a file . Here is my java code .

Document doc = parseDocument("properties.xml");
NodeList properties = doc.getElementsByTagName("property");
for( int i = 0 , len = properties.getLength() ; i < len ; i++) {
     Element property = (Element)properties.item(i);
     //How can i proceed further .
}

Output Expected :

 Node 1 : 127.0.0.1

Please suggest your views .

Upvotes: 1

Views: 2735

Answers (4)

RGO
RGO

Reputation: 4727

You could solve this problem in another way using XSLT. Here is the Java code:

public static void main(String args[]) throws TransformerException{

    String xmlFilePath = "/path/to/xml";
    String xslFilePath = "/path/to/xsl";

    Source xmlSource = new StreamSource(new File(xmlFilePath));
    Source xsltSource = new StreamSource(new File(xslFilePath));        
    Result transResult = new StreamResult(System.out);

    TransformerFactory transFact = TransformerFactory.newInstance();
    Transformer trans = transFact.newTransformer(xsltSource);

    trans.transform(xmlSource, transResult);

}

and here is the style sheet file:

<?xml version="1.0" encoding="ISO-8859-1"?>

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">

    <xsl:output method="text" omit-xml-declaration="yes" />

    <xsl:template match="/">
        <xsl:apply-templates select="*/property"/>
    </xsl:template>

    <xsl:template match="property">
        <xsl:number /> 
        <xsl:text> </xsl:text>
        <xsl:apply-templates select="node()" />
        <xsl:text>&#xa;</xsl:text>
    </xsl:template>

    <xsl:template match="node()">
        <xsl:if test="normalize-space(.)">
            <xsl:value-of select="normalize-space(.)" />
        </xsl:if>
    </xsl:template>

</xsl:stylesheet>

When applied to this input:

  <root>
  <property regex=".*" xpath=".*">
       <value>
            127.0.0.1
       </value>
       <anythingElse>Text here</anythingElse>
  </property>
  <property regex=".*" xpath=".*">
  <value>
       val <![CDATA[
       <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
       ]]> test
  </value>
  </property>
  </root>

The below output will be resulted:

1 127.0.0.1
2 val <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/> test

Upvotes: 0

Andriy Kuba
Andriy Kuba

Reputation: 8263

read node value one by one:

    private static void printValues(String xml) throws Exception {
    Element element = parseXml(xml);

    NodeList values = element.getElementsByTagName("value");
    for(int i = 0; i<values.getLength(); i++){
        Node item = values.item(i);
        NodeList vals = item.getChildNodes();

        String value = ""; 

        for(int j = 0; j<vals.getLength(); j++){
            value += vals.item(j).getNodeValue(); 
        }

        System.out.print("Node ");
        System.out.print(Integer.toString(i));
        System.out.print(": ");
        System.out.println(value.trim());
    }
}

public static Element parseXml(String source) throws Exception{
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    Document doc = dBuilder.parse(new InputSource(new ByteArrayInputStream(source.getBytes("utf-8"))));
    Element element = doc.getDocumentElement();
    element.normalize();

    return element;
}

Upvotes: 0

toe
toe

Reputation: 121

The following method looks for all property elements within the document and collects all value children of those elements named value wihtout using XPath.

  private static List<Element> getValueElements(Document document) {
    List<Element> result = new ArrayList<Element>();
    NodeList propertyElements = document.getElementsByTagName("property");
    for (int i = 0, ilen = propertyElements.getLength(); i < ilen; i++) {
      Node propertyNode = propertyElements.item(i);
      if (!(propertyNode instanceof Element))
        continue;

      NodeList children = ((Element) propertyNode).getChildNodes();
      for (int j = 0, jlen = children.getLength(); j < jlen; j++) {
        Node child = children.item(j);
        if (!(child instanceof Element) || !"value".equals(child.getNodeName()))
          continue;

        result.add((Element) child);
      }
    }
    return result;
  }

But you can do the same in a more elegant way using the XPath expression //property/value:

private static List<Element> getValueElementsUsingXpath(Document document) throws XPathExpressionException {
  XPath xpath = XPathFactory.newInstance().newXPath();
  // XPath Query for showing all nodes value
  XPathExpression expr = xpath.compile("//property/value");
  Object xpathResult = expr.evaluate(document, XPathConstants.NODESET);

  List<Element> result = new ArrayList<Element>();
  NodeList nodes = (NodeList) xpathResult;
  for (int i = 0; i < nodes.getLength(); i++) {
    Node valueNode = nodes.item(i);
    if (!(valueNode instanceof Element)) continue;
    result.add((Element) valueNode);
  }

  return result;
}

You can use the method above like this:

  public static void main(String[] args) throws Exception {
    Document doc = parseDocument("properties.xml");
    List<Element> valueElements = getValueElements(doc);  // or getValueElementsUsingXpath(doc)

    int nodeNumber = 0;
    for (Element element : valueElements) {
      nodeNumber++;
      System.out.println("Node " + nodeNumber + ": " + formatValueElement(element));
    }
  }

  private static String formatValueElement(Element element) {
    StringBuffer result = new StringBuffer();

    boolean first = true;
    NodeList children = ((Element) element).getChildNodes();
    for (int i = 0, len = children.getLength(); i < len; i++) {
      Node child = children.item(i);

      String childText = null;
      switch (child.getNodeType()) {
      case Node.CDATA_SECTION_NODE:
      case Node.TEXT_NODE:
        childText = child.getTextContent().trim();
      }

      if (childText == null || childText.isEmpty()) {
        continue;
      }

      if (first)
        first = false;
      else
        result.append(" ");

      result.append(childText);
    }

    return result.toString();
  }

I tested it with the following two XML inputs, since your XML lacks a closing </property> tag.

Here is the first one (I added extra elements, to show that they are not found):

  <rootNode>
  <property regex=".*" xpath=".*">
       <value>
            127.0.0.1
       </value>
       <anythingElse>Text here</anythingElse>
  </property>
  <anythingElse>Text here</anythingElse>
  <property regex=".*" xpath=".*">
  <value>
       val <![CDATA[
       <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
       ]]> test
  </value>
  </property>
  </rootNode>

The second one has nested property elements (I added the missing element at the end):

  <property regex=".*" xpath=".*">
      <value>
          127.0.0.1
      </value>
      <property regex=".*" xpath=".*">
      <value>
          val <![CDATA[
          <Valve className="org.tomcat.AccessLogValve" exclude="PASSWORD,pwd,pWord,ticket" enabled="true" serviceName="zohocrm" logDir="../logs" fileName="access" format="URI,&quot;PARAM&quot;,&quot;REFERRER&quot;,TIME_TAKEN,BYTES_OUT,STATUS,TIMESTAMP,METHOD,SESSION_ID,REMOTE_IP,&quot;INTERNAL_IP&quot;,&quot;USER_AGENT&quot;,PROTOCOL,SERVER_NAME,SERVER_PORT,BYTES_IN,ZUID,TICKET_DIGEST,THREAD_ID,REQ_ID"/>
          ]]> test
      </value>
      </property>
  </property>

Upvotes: 3

Martin Honnen
Martin Honnen

Reputation: 167436

Document doc = parseDocument("properties.xml");
NodeList properties = doc.getElementsByTagName("property");
for( int i = 0 , len = properties.getLength() ; i < len ; i++) {
     Element property = (Element)properties.item(i);
     Element value = (Element)property.getElementsByTagName("value").item(0);
     if (value != null)
     {
        System.out.println("Node " + (i + 1) + ": " + value.getTextContent());
     }
}

should help to access the contents of the element. Note that you additionally might need or might want to strip leading and trailing white space if you want the exact result you posted.

Upvotes: 0

Related Questions