Reputation: 39
I have a well formed xml
that i want to parse and retrieve the line
elements.
But I keep getting an empty XmlNodeList
blocks
. What am I doing wrong?
Thanks!
XmlDocument doc = new XmlDocument();
doc.Load("file.xml"));
XmlNodeList blocks = doc.DocumentElement.SelectNodes("/document/page/block");
foreach (XmlNode block in blocks)
{
//keep iterating over `par` elements in `text`
}
file.xml:
<document xmlns="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml" version="1.0" producer="ABBYY FineReader Engine 11" languages="" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml">
<page width="1043" height="653" resolution="300" originalCoords="1">
<block blockType="Text" blockName="" l="43" t="27" r="736" b="147"><region><rect l="641" t="27" r="735" b="28"/><rect l="520" t="28" r="735" b="29"/><rect l="399" t="29" r="735" b="30"/><rect l="277" t="30" r="735" b="31"/><rect l="156" t="31" r="735" b="32"/><rect l="43" t="32" r="735" b="83"/><rect l="43" t="83" r="736" b="86"/><rect l="44" t="86" r="736" b="142"/><rect l="44" t="142" r="643" b="143"/><rect l="44" t="143" r="521" b="144"/><rect l="44" t="144" r="400" b="145"/><rect l="44" t="145" r="279" b="146"/><rect l="44" t="146" r="157" b="147"/></region>
<region>
</region>
<text>
<par lineSpacing="816">
<line baseline="58" l="314" t="28" r="734" b="55">
<formatting lang="EnglishUnitedStates">Information priveid</formatting>
</line>
<line baseline="92" l="377" t="61" r="673" b="89">
<formatting lang="EnglishUnitedStates">Canyouread this</formatting>
</line>
</par>
<par>
<line baseline="146" l="45" t="110" r="679" b="146">
<formatting lang="EnglishUnitedStates"> This can not be happening?</formatting>
</line>
</par>
</text>
</block>
</page>
</document>
Upvotes: 0
Views: 105
Reputation: 34421
Try xml linq
Below is using anonymous types
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication16
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
XNamespace ns = ((XElement)doc.FirstNode).Name.Namespace;
var pars = doc.Descendants(ns + "par").Select(x => new {
lineSpacing = (int?)x.Attribute("lineSpacing"),
lines = x.Elements(ns + "line").Select(y => new {
baseline= (int)y.Attribute("baseline"),
l = (int)y.Attribute("l"),
t = (int)y.Attribute("t"),
r = (int)y.Attribute("r"),
b = (int)y.Attribute("b"),
formatting = (string)y.Element(ns + "formatting"),
lang = (string)y.Element(ns + "formatting").Attribute("lang")
}).ToList()
}).ToList();
}
}
}
Below is using Class structures
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
namespace ConsoleApplication16
{
class Program
{
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
XDocument doc = XDocument.Load(FILENAME);
XNamespace ns = ((XElement)doc.FirstNode).Name.Namespace;
Document.documents = doc.Descendants(ns + "par").Select(x => new Document() {
lineSpacing = (int?)x.Attribute("lineSpacing"),
lines = x.Elements(ns + "line").Select(y => new Line() {
baseline = (int)y.Attribute("baseline"),
l = (int)y.Attribute("l"),
t = (int)y.Attribute("t"),
r = (int)y.Attribute("r"),
b = (int)y.Attribute("b"),
formatting = (string)y.Element(ns + "formatting"),
lang = (string)y.Element(ns + "formatting").Attribute("lang")
}).ToList()
}).ToList();
}
}
public class Document
{
public static List<Document> documents = new List<Document>();
public int? lineSpacing { get; set; }
public List<Line> lines { get; set; }
}
public class Line
{
public int baseline { get; set; }
public int l { get; set; }
public int t { get; set; }
public int r { get; set; }
public int b { get; set; }
public string formatting { get; set; }
public string lang { get; set; }
}
}
Upvotes: 0
Reputation: 556
It's because your <document>
element has a namespace.
Try this:
var nsManager = new XmlNamespaceManager(doc.NameTable);
nsManager.AddNamespace("nsp", "http://www.abbyy.com/FineReader_xml/FineReader10-schema-v1.xml");
XmlNodeList blocks = doc.SelectNodes("//nsp:page/nsp:block", nsManager);
Or, if possible, remove the namespace from your document and use your original code.
Upvotes: 2