Raja
Raja

Reputation: 57

Get unique Attributes from XML using VBA

I have the below XML. I need to get unique attributes & Tags from the XML. Could someone guide me in fetching only the unique attributes from XML.Thanks in advance!

<Elements>
<Details>
    <Name>ABCD</Name>
    <Address>1D23ABC</Address>
</Details>  
<Dept num = "12S3" >
    <Deptname>ITS</Deptname>
    <ID>A12S3</ID>
    <ID1>A12W3</ID1>
</Dept> 
    <Dept num = "123" >
    <Deptname>IT1</Deptname>
    <ID>A1231</ID>
    <ID1>A1213</ID1>
</Dept> 

My output should be as below

Elements
Details    Name
           Address

Dept       Num
           Deptname
           ID
           ID1

Below is the VBA code I am using:

Set mainnode = oXMLFile.SelectNodes("//Elements")

 For Each node In mainnode
  Dim child As Object
    i = 0
    For Each child In node.ChildNodes
            Worksheets("sheet1").Range("C" & i + 1).Value = child.BaseName
            Dim kiddo As Object
                For Each kiddo In child.ChildNodes
                Debug.Print kiddo.BaseName
             Worksheets("sheet1").Range("D" & i + 1).Value =                                                     
                kiddo.BaseName
                i = i + 1
                Next kiddo
            Next child
            Next node

Current Output:

Elements
Details    Name
           Address

Dept       Num
           Deptname
           ID
           ID1
Dept       Num
           Deptname
           ID
           ID1

Upvotes: 0

Views: 369

Answers (1)

QHarr
QHarr

Reputation: 84465

VERSION 1 regex:

With a regex (not generally recommended for working with XML/HTML)

Option Explicit
Public Sub testing()
    Dim xmlDoc As New MSXML2.DOMDocument60
    Set xmlDoc = New MSXML2.DOMDocument60
    xmlDoc.Load "C:\Users\User\Desktop\Test.xml"
    Dim arr()  As String, dict As Object, key As Variant, i As Long
    arr = Split(GetTags(xmlDoc.XML), "##"): Set dict = CreateObject("Scripting.Dictionary")
    For i = LBound(arr) To UBound(arr)
        dict(Replace(Replace(arr(i), Chr$(60), vbNullString), Chr$(62), vbNullString)) = 1
    Next i
    ActiveSheet.Range("A1").Resize(dict.Count, 1) = Application.WorksheetFunction.Transpose(dict.keys)
End Sub

Public Function GetTags(ByVal xmlString As String) As Variant
    Dim arr() As String, i As Long, matches As Object, re As Object
    Set re = CreateObject("VBScript.RegExp")
    With re
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "<([^\/].*?)>"

        If .test(xmlString) Then
            Set matches = .Execute(xmlString)

            ReDim arr(0 To matches.Count - 1)
            For i = LBound(arr) To UBound(arr)
                arr(i) = matches(i)
            Next i
        Else
            arr(i) = xmlString
        End If
        GetTags = Join(arr, "##")
    End With
End Function

Regex

regex

Try it


Output:

You can remove/ignore the initial tag covering the doc type.

output


References:

Tools > References > Microsoft XML (your version e.g. 6.0)


VERSION 2 Traversing tree structure:

More robust solution adapting your existing code to use a dictionary of dictionaries so tree structure can be easily written out.

Option Explicit
Public Sub testing()
    Dim xmlDoc As New MSXML2.DOMDocument60, mainNode As Object, Node As Object, dict As Object, r As Long
    Set xmlDoc = New MSXML2.DOMDocument60
    xmlDoc.Load "C:\Users\User\Desktop\Test.xml"
    Set mainNode = xmlDoc.SelectNodes("//Elements"): Set dict = CreateObject("Scripting.Dictionary")

    [B1] = xmlDoc.DocumentElement.nodeName

    For Each Node In mainNode
        Dim child As Object
        For Each child In Node.ChildNodes
            If Not dict.exists(child.BaseName) Then
                dict.Add child.BaseName, CreateObject("Scripting.Dictionary")
            End If
            Dim kiddo As Object
            For Each kiddo In child.ChildNodes
                If Not dict(child.BaseName).exists(kiddo.BaseName) Then
                    dict(child.BaseName).Add kiddo.BaseName, 1
                End If
            Next kiddo
        Next child
    Next Node
    r = 0
    Dim key1 As Variant, key2 As Variant
    For Each key1 In dict.keys
        Worksheets("sheet1").Range("C" & r + 1) = key1
        For Each key2 In dict(key1).keys
             Worksheets("sheet1").Range("D" & r + 1).Value = key2
             r = r + 1
        Next
    Next
End Sub

Upvotes: 1

Related Questions