CiucaS
CiucaS

Reputation: 2128

Google DiffMatchPatch to NodeView

I been struggling for some time with a XmlComparsion functionality. I have to compare two Xml for differences and then display them in a TreeView way.

I've done some research and found out Google Diff Match Patch library that looks exactly what I need for my comparison part.

https://github.com/google/diff-match-patch

I've implemented it as following

public List<Diff> DiffLineMode(string text1, string text2)
        {
            var a = base.diff_linesToChars(text1, text2);
            var lineText1 = a[0];
            var lineText2 = a[1];
            var lineArray = (IList<string>)a[2];
            var diffs = base.diff_main(lineText1.ToString(), lineText2.ToString(), false);
            base.diff_charsToLines(diffs, lineArray);

                  return diffs;
        } 

Now my big challenge which I would need some help with is getting this difference into a treeview like object.

I've done a tree view class in which I parse both my Xml files used in comparison

public class TreeViewModel
    {
        public long ID { get; set; }
        public string NodeName { get; set; }
        public string AbsolutPath { get; set; }
        public string Value { get; set; }
        public string Xml { get; set; }
        public int Operation { get; set; } = 2;
        public List<TreeViewModel> Children { get; set; }
        public Dictionary<string, string> Attributes { get; set; }
    } 

And now what I would like to do is make sure the Operation from TreeView matches the Operation result from DIffMatchPatch, but my problem is that DiffMatchPatch compares Xml as string and does not consider at all Xml as a node tree. As a result for 2 simple Xml book files.

<?xml version="1.0"?>
<catalog>
   <book id="bk101">
      <author>Gambardella, Matthew</author>
      <title>XML Developer's Guide</title>
      <genre>Computer</genre>
      <price>44.95</price>
      <publish_date>2000-10-01</publish_date>
      <description>An in-depth look at creating applications 
      with XML.</description>
   </book>
   <book id="bk112">
      <author>Ralls, Kim</author>
      <title>Midnight Rain Update but should not reflect</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-12-16</publish_date>
      <description>A former architect battles corporate zombies, 
      an evil sorceress, and her own childhood to become queen 
      of the world.</description>
   </book>
   <book id="bk999">
      <author>O'Brien, Tim</author>
      <title>Microsoft .NET: The Programming Bible</title>
      <genre>Computer</genre>
      <price>36.95</price>
      <publish_date>2000-12-09</publish_date>
      <description>Microsoft's .NET initiative is explored in 
      detail in this deep programmer's reference.</description>
   </book>  
   <book id="bk998">
      <author>Chris Tucker</author>
      <title>This Is my Churck</title>
      <genre>Music</genre>
      <price>49.99</price>
      <publish_date>2021-11-03</publish_date>
      <description>This is a custom description from Sebi Ciuca</description>
      <coauthor>Sebi</coauthor>
   </book>    
   <book id="bk103">
      <author>Corets, Eva</author>
      <title>Maeve Ascendant</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-11-17</publish_date>
      <description>After the collapse of a nanotechnology 
      society in England, the young survivors lay the 
      foundation for a new society.</description>
   </book>
</catalog>

and

<?xml version="1.0"?>
<catalog>
   <book id="bk101">
      <author>Gambardella, Matthew</author>
      <title>XML Developer's Guide</title>
      <genre>Computer</genre>
      <price>44.95</price>
      <publish_date>2000-10-01</publish_date>
      <description>An in-depth look at creating applications 
      with XML.</description>
   </book>
   <book id="bk102">
      <author>Ralls, Kim</author>
      <title>Midnight Rain</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-12-16</publish_date>
      <description>A former architect battles corporate zombies, 
      an evil sorceress, and her own childhood to become queen 
      of the world.</description>
   </book>
   <book id="bk103">
      <author>Corets, Eva</author>
      <title>Maeve Ascendant</title>
      <genre>Fantasy</genre>
      <price>5.95</price>
      <publish_date>2000-11-17</publish_date>
      <description>After the collapse of a nanotechnology 
      society in England, the young survivors lay the 
      foundation for a new society.</description>
   </book>
   <book id="bk997">
      <author>Corets, Eva</author>
      <title>Maeve Ascendant Part II</title>
      <genre>Fantasy</genre>
      <price>15.95</price>
      <publish_date>2022-01-17</publish_date>
      <description>After one collapse of a nanotechnology 
      society in England, the young survivors lay the 
      foundation for a new society.</description>
   </book>
</catalog>

it will output this

 "difference": [
    {
      "operation": 2,
      "text": "<catalog>\r\n  <book id=\"bk101\">\r\n    <author>Gambardella, Matthew</author>\r\n    <title>XML Developer's Guide</title>\r\n    <genre>Computer</genre>\r\n    <price>44.95</price>\r\n    <publish_date>2000-10-01</publish_date>\r\n    <description>An in-depth look at creating applications \r\n      with XML.</description>\r\n  </book>\r\n"
    },
    {
      "operation": 0,
      "text": "  <book id=\"bk112\">"
    },
    {
      "operation": 1,
      "text": "  <book id=\"bk102\">"
    },
    {
      "operation": 2,
      "text": "    <author>Ralls, Kim</author>\r\n    <genre>Fantasy</genre>\r\n    <price>5.95</price>\r\n    <publish_date>2000-12-16</publish_date>\r\n    <description>A former architect battles corporate zombies, \r\n      an evil sorceress, and her own childhood to become queen \r\n      of the world.</description>\r\n  </book>\r\n"
    },
    {
      "operation": 0,
      "text": "  <book id=\"bk999\">    <author>O'Brien, Tim</author>    <title>Microsoft .NET: The Programming Bible</title>    <genre>Computer</genre>    <price>36.95</price>    <publish_date>2000-12-09</publish_date>    <description>Microsoft's .NET initiative is explored in       detail in this deep programmer's reference.</description>  </book>  <book id=\"bk998\">    <author>Chris Tucker</author>    <title>This Is my Churck</title>    <genre>Music</genre>    <price>49.99</price>    <publish_date>2021-11-03</publish_date>    <description>This is a custom description from Sebi Ciuca</description>    <coauthor>Sebi</coauthor>  </book>"
    },
    {
      "operation": 2,
      "text": "  <book id=\"bk103\">\r\n    <author>Corets, Eva</author>\r\n    <title>Maeve Ascendant</title>\r\n    <genre>Fantasy</genre>\r\n    <price>5.95</price>\r\n    <publish_date>2000-11-17</publish_date>\r\n    <description>After the collapse of a nanotechnology \r\n"
    },
    {
      "operation": 1,
      "text": "      society in England, the young survivors lay the       foundation for a new society.</description>  </book>  <book id=\"bk997\">    <author>Corets, Eva</author>    <title>Maeve Ascendant Part II</title>    <genre>Fantasy</genre>    <price>15.95</price>    <publish_date>2022-01-17</publish_date>    <description>After one collapse of a nanotechnology "
    },
    {
      "operation": 2,
      "text": "      society in England, the young survivors lay the \r\n      foundation for a new society.</description>\r\n  </book>\r\n</catalog>"
    }
  ]

which looks something like this in a more "readable" view

enter image description here

Now as you can see if you nave 2 or more nodes difference, they are "put" together inside as a "hole difference" this makes it very tricky for me to parse it into a treeview like object.

Here you can also see for book997 which is a hole new book but with the same description as book103, it will consider the "added" text beging from description of book 103 and ending before the description of book997. This in a way I would like to translte into Node that both nodes have changes, but I don't know what I can use to match a specific TreeView Node with a part or hole of a difference.

Hope I made myself clear. I start to feel I started very wrong with GoogleDiffMatchPatch for what I need, but at this point it would be even harder for me to create a custom xml comparer based on nodes.

EDIT : Added also the Parsing from XML To TreeView

 public TreeViewModel ToTreeView(XDocument xmlDocument)
        {
            var rootNode = xmlDocument.Root;

            return ParseNode(rootNode, 1);
        }

  private TreeViewModel ParseNode(XElement node, int parentId)
        {
            if (node.HasElements)
            {
                var index = 0;
                var children = new List<TreeViewModel>();
                bool hasChild = false;
                foreach (var element in node.Elements())
                {
                    children.Add(ParseNode(element, parentId * 10 + index, panoramaId));
                    index++;
                }

                var treeNode = new TreeViewModel
                {
                    Children = children,
                    Value = hasChild ? string.Empty : node.Value,
                    AbsolutPath = node.GetAbsoluteXPath(),
                    ID = parentId,
                    Xml = node.ToString(),
                    NodeName = node.Name.ToString(),
                    Attributes = node.GetAttributes()
                };
            }
            else
            {
                var treeNode = new TreeViewModel
                {
                    ID = parentId,
                    Value = node.Value,
                    AbsolutPath = node.GetAbsoluteXPath(),
                    NodeName = node.Name.ToString(),
                    Attributes = node.GetAttributes(),
                    Xml = node.ToString(),
                };
            }
        }

Added git repository : https://github.com/SebiCiuca/DiffMatchPatchToNodeView

Upvotes: 2

Views: 560

Answers (1)

jdweng
jdweng

Reputation: 34421

See if following helps :

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Serialization;

namespace ConsoleApplication11
{
    class Program
    {
        const string FILENAME1 = @"c:\temp\test.xml";
        const string FILENAME2 = @"c:\temp\test1.xml";
        static void Main(string[] args)
        {
            TreeViewModel model = new TreeViewModel(FILENAME1, FILENAME2);
        }
    }
    public class TreeViewModel
    {
        public string AbsolutPath { get; set; }
        public string Value { get; set; }
        public int Operation { get; set; }
        public Catalog catalog1 { get;set;}
        public Catalog catalog2 { get;set;}
        public Dictionary<string, Book> catalog1Dict { get; set; }
        public Dictionary<string, Book> catalog2Dict { get; set; }

        public TreeViewModel(string filename1, string filename2)
        {
            XmlSerializer serializer = new XmlSerializer(typeof(Catalog));
            XmlReader reader = XmlReader.Create(filename1);

            catalog1 = (Catalog)serializer.Deserialize(reader);
            catalog1Dict = catalog1.books.GroupBy(x => x.id).ToDictionary(x => x.Key, y => y.FirstOrDefault());

            reader = XmlReader.Create(filename2);
            catalog2 = (Catalog)serializer.Deserialize(reader);
            catalog2Dict = catalog2.books.GroupBy(x => x.id).ToDictionary(x => x.Key, y => y.FirstOrDefault());

        }
    } 
    [XmlRoot("catalog")]
    public class Catalog
    {
        [XmlElement("book")]
        public List<Book> books { get;set;}
    }
    public class Book
    {
        [XmlAttribute()]
        public string id { get;set;}
        public string author { get;set;}
        public string title { get;set;}
        public string genre { get;set;}
        public decimal price { get;set;}
        public DateTime publish_date { get;set;}
        public string description { get;set;}
    }
}

Upvotes: -1

Related Questions