DrCoolZic
DrCoolZic

Reputation: 39

Parsing XML in C# with variable children structure

I need to parse an XML file with the following structure: EDITED

<?xml version="1.0" encoding="windows-1252" ?>
<TABLE>
    <COMPO>
          <alim_code> 1000 </alim_code>
          <const_code> 56700 </const_code>
          <teneur> 0 </teneur>
          <min missing=" " />
          <max missing=" " />
          <code_confiance> D </code_confiance>
          <source_code missing="." />
    </COMPO>
    <COMPO>
          <alim_code> 1000 </alim_code>
          <const_code> 60000 </const_code>
          <teneur> 37,5 </teneur>
          <min>         37,3 </min>
          <max>         37,6 </max>
          <code_confiance> D </code_confiance>
          <source_code> 38 </source_code>
    </COMPO>
</TABLE>

As you can see several fields are described differently when the value is known or missing. I have tried to use ReadXml() in a DataSet but it does not seems to work on "variable structure". Looks like the solution is probably to use Xdocument and LINQ but I am ignorant about LINQ and I did not succeed to write a working code.

I would appreciate if someone can show me possible code to parse and print (or even better add to a db) the content of this kind of XML file.

Upvotes: 0

Views: 356

Answers (3)

DrCoolZic
DrCoolZic

Reputation: 39

Working solution based on Ehsan-Sajjad proposal :)

[XmlRoot(ElementName = "min")]
public class Min
{
    [XmlAttribute(AttributeName = "missing")]
    public string Missing { get; set; }
    [XmlText]
    public string Value { get; set; }
}

[XmlRoot(ElementName = "max")]
public class Max
{
    [XmlAttribute(AttributeName = "missing")]
    public string Missing { get; set; }
    [XmlText]
    public string Value { get; set; }
}

[XmlRoot(ElementName = "source_code")]
public class Source_code
{
    [XmlAttribute(AttributeName = "missing")]
    public string Missing { get; set; }
    [XmlText]
    public string Value { get; set; }
}

[XmlRoot(ElementName = "COMPO")]
public class COMPO
{
    [XmlElement(ElementName = "alim_code")]
    public string Alim_code { get; set; }
    [XmlElement(ElementName = "const_code")]
    public string Const_code { get; set; }
    [XmlElement(ElementName = "teneur")]
    public string Teneur { get; set; }
    [XmlElement(ElementName = "min")]
    public Min Min { get; set; }
    [XmlElement(ElementName = "max")]
    public Max Max { get; set; }
    [XmlElement(ElementName = "code_confiance")]
    public string Code_confiance { get; set; }
    [XmlElement(ElementName = "source_code")]
    public Source_code Source_code { get; set; }
}

[XmlRoot(ElementName = "TABLE")]
public class TABLE
{
    [XmlElement(ElementName = "COMPO")]
    public List<COMPO> COMPO { get; set; }
}

private void ReadCompoWithSerializer()
{
    string xmlFile = "test.xml";
    string xml = File.ReadAllText(xmlFile);
    XmlSerializer serializer = new XmlSerializer(typeof(TABLE));
    TABLE table = null;
    using (var reader = new StringReader(xml))
    {
        table = (TABLE)serializer.Deserialize(reader);
    }
}

Inside Min/Max/Source_code objects Value contains the value when present (in that case Missing is null) otherwise Value is null (in that case Missing contain a string)


Working solution based on jdweng proposal

public class Compo  {
    public string alim_code { get; set; }
    public string const_code { get; set; }
    public string teneur { get; set; }
    public string min { get; set; }
    public string max { get; set; }
    public string code_confiance { get; set; }
    public string source_code { get; set; }
}

private void ReadCompoWithLinq() {
        const string FILENAME = "test.xml";
        XDocument doc = XDocument.Load(FILENAME);

        List<Compo> compos = doc.Descendants("COMPO").Select(x => new Compo()
        {
            alim_code = (string)x.Element("alim_code"),
            const_code = (string)x.Element("const_code"),
            teneur = (string)x.Element("teneur"),
            min = (x.Element("min").Attribute("missing") != null) ? null : (string)x.Element("min"),
            max = (x.Element("max").Attribute("missing") != null) ? null : (string)x.Element("max"),
            code_confiance = (string)x.Element("code_confiance"),
            source_code = (x.Element("source_code").Attribute("missing") != null) ? null : (string)x.Element("source_code"),
        }).ToList();
}

The min,max,source_code objects contains the string value when provided (in that case the attribute "missing is used in XML description) otherwise the string value is null.

Upvotes: -1

jdweng
jdweng

Reputation: 34421

I would use Xml Linq. There are some items that can be null that you need to handle properly. See code below

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Globalization;

namespace ConsoleApplication1
{
    class Program
    {
        const string FILENAME = @"c:\temp\test.xml";
        static void Main(string[] args)
        {
            XDocument doc = XDocument.Load(FILENAME);
            IFormatProvider provider = CultureInfo.InvariantCulture;

            List<Compo> compos = doc.Descendants("COMPO").Select(x => new Compo() {
                alim_code = (int?)x.Element("alim_code"),
                const_code = (int?)x.Element("const_code"),
                teneur =  (string)x.Element("teneur") == "" ? null : (decimal?)Convert.ToDecimal((string) x.Element("teneur"),provider),
                min = (string)x.Element("min") == "" ? null : (decimal?)Convert.ToDecimal((string) x.Element("teneur"),provider),
                max = (string)x.Element("max") == "" ? null : (decimal?)Convert.ToDecimal((string) x.Element("teneur"),provider),
                code_confiance = (string)x.Element("code_confiance"),
                source_code = (string)x.Element("source_code") == "" ? null : (int?)int.Parse(((string)x.Element("source_code")).Trim())
            }).ToList();
        }
    }
    public class Compo
    {
        public int? alim_code {get; set;}
        public int? const_code {get; set;}
        public decimal? teneur {get; set;}
        public decimal? min {get; set;}
        public decimal? max {get; set;}
        public string code_confiance {get; set;}
        public int? source_code { get; set; }
    }
}

Upvotes: 1

Ehsan Sajjad
Ehsan Sajjad

Reputation: 62488

Linq will not be helpful here. What you can do is create the DTO for your xml using the sample xml that contains all the attributes and nodes which can be returned in the xml.

and then deserialize the xml in to that type.

Your classes would be something like following from the xml above which are being generated:

[XmlRoot(ElementName="min")]
public class Min {
    [XmlAttribute(AttributeName="missing")]
    public string Missing { get; set; }
}

[XmlRoot(ElementName="max")]
public class Max {
    [XmlAttribute(AttributeName="missing")]
    public string Missing { get; set; }
}

[XmlRoot(ElementName="source_code")]
public class Source_code {
    [XmlAttribute(AttributeName="missing")]
    public string Missing { get; set; }
}

[XmlRoot(ElementName="COMPO")]
public class COMPO {
    [XmlElement(ElementName="alim_code")]
    public string Alim_code { get; set; }
    [XmlElement(ElementName="const_code")]
    public string Const_code { get; set; }
    [XmlElement(ElementName="teneur")]
    public string Teneur { get; set; }
    [XmlElement(ElementName="min")]
    public Min Min { get; set; }
    [XmlElement(ElementName="max")]
    public Max Max { get; set; }
    [XmlElement(ElementName="code_confiance")]
    public string Code_confiance { get; set; }
    [XmlElement(ElementName="source_code")]
    public Source_code Source_code { get; set; }
}

Now you can use `XmlSerializer class to deserialize it :

XmlSerializer serializer = new XmlSerializer(typeof(List<COMPO>));
List<COMPO> compos = null;
using (var reader = new StringReader(xml))
{
   compos = (List<COMPO>)serializer.Deserialize(reader);
}

EDIT:

In that case add another type for Table which would be :

[XmlRoot(ElementName="Table")]
public class Table {
    [XmlElement(ElementName="COMPO")]
    public List<COMPO> COMPO { get; set; }
}

and now the adjust the de-serialization code accordingly:

XmlSerializer serializer = new XmlSerializer(typeof(Table));
Table table = null;
using (var reader = new StringReader(xml))
{
   compos = (Table)serializer.Deserialize(reader);
}

Upvotes: 3

Related Questions