Reputation: 24998
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE people SYSTEM "validator.dtd">
<people>
<student>
<name>John</name>
<course>Computer Technology</course>
<semester>6</semester>
<scheme>E</scheme>
</student>
<student>
<name>Foo</name>
<course>Industrial Electronics</course>
<semester>6</semester>
<scheme>E</scheme>
</student>
</people>
This is my XML document. I have made a tree representation of this.
Is this tree representation correct or is it wrong ?
Upvotes: 1
Views: 108
Reputation: 61128
You can get a pretty good idea by just printing a DOM tree:
public static void main(String[] args) throws UnsupportedEncodingException, IOException, ParserConfigurationException, SAXException {
final String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>"
+ "<people>"
+ " <!-- a comment -->"
+ " <student>"
+ " <name>John</name>"
+ " <!-- a comment -->"
+ " <course>Computer Technology</course>"
+ " <semester>6</semester>"
+ " <scheme>E</scheme>"
+ " </student>"
+ ""
+ " <student>"
+ " <name>Foo</name>"
+ " <course>Industrial Electronics</course>"
+ " <semester>6</semester>"
+ " <scheme>E</scheme>"
+ " </student>"
+ "</people>";
final Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new ByteArrayInputStream(xml.getBytes()));
printNodes(document.getDocumentElement(), 0);
}
private static void printNodes(final Node node, final int depth) {
final StringBuilder prefix = new StringBuilder();
for (int i = 0; i < depth; ++i) {
prefix.append("\t");
}
if (node.getNodeType() == Node.ELEMENT_NODE) {
System.out.println(prefix.toString() + "Going into " + node.getNodeName());
final NodeList nodeList = node.getChildNodes();
for (int i = 0; i < nodeList.getLength(); ++i) {
printNodes(nodeList.item(i), depth + 1);
}
} else if (node.getNodeType() == Node.COMMENT_NODE) {
System.out.println(prefix.toString() + "Comment node: \"" + node.getTextContent() + "\"");
} else {
System.out.println(prefix.toString() + "Text node: \"" + node.getTextContent() + "\"");
}
}
The output of this is:
Going into people
Text node: " "
Comment node: " a comment "
Text node: " "
Going into student
Text node: " "
Going into name
Text node: "John"
Text node: " "
Comment node: " a comment "
Text node: " "
Going into course
Text node: "Computer Technology"
Text node: " "
Going into semester
Text node: "6"
Text node: " "
Going into scheme
Text node: "E"
Text node: " "
Text node: " "
Going into student
Text node: " "
Going into name
Text node: "Foo"
Text node: " "
Going into course
Text node: "Industrial Electronics"
Text node: " "
Going into semester
Text node: "6"
Text node: " "
Going into scheme
Text node: "E"
Text node: " "
As you can see there are text nodes everywhere, in between the visible nodes. This is because you could, in theory, have text around a child node - for example
<student>
some random text
<course>Computer</course>
some more text
</student>
So the DOM tree needs to take this into account. If the XML was not pretty-printed but just a single line then the next nodes would be empty rather than full of whitespace.
Fiddle around with the document and see what impact it has on the output.
Upvotes: 3