Reputation: 135
This is the VBA code I'm currently using to import an XML into my workbook (which works just fine):
Sub matomoImportXML()
Dim matomoXML As Workbook
Application.DisplayAlerts = False
Set matomoXML = Workbooks.OpenXML(FileName:=matomo_xml, LoadOption:=xlXmlLoadImportToList)
Application.DisplayAlerts = True
matomoXML.Sheets(1).UsedRange.Copy report.Sheets(matomo_data).Range("A1")
matomoXML.Close False
End Sub
And this is an example of the XML file:
<?xml version="1.0" encoding="UTF-8"?>
<result>
<row>
<label>lp_total_pageviews=1 - my-website.com/please-verify-you-age-to-enter/ - Others</label>
<nb_uniq_events_eventaction>118</nb_uniq_events_eventaction>
<nb_uniq_events_eventcategory>118</nb_uniq_events_eventcategory>
<nb_uniq_corehome_visitip>118</nb_uniq_corehome_visitip>
<level>3</level>
<Events_EventAction>lp_total_pageviews=1</Events_EventAction>
<Events_EventCategory>my-website.com%2Fplease-verify-you-age-to-enter%2F</Events_EventCategory>
<is_summary>1</is_summary>
<CoreHome_VisitIp>Others</CoreHome_VisitIp>
</row>
<row>
<label>lp_total_clicks=1 - my-website.com/please-verify-you-age-to-enter-rs/ - xxxx:1009:b00a:6fd8:d937:5eb2:7563:de56</label>
<nb_uniq_events_eventaction>3</nb_uniq_events_eventaction>
<nb_uniq_events_eventcategory>3</nb_uniq_events_eventcategory>
<nb_uniq_corehome_visitip>3</nb_uniq_corehome_visitip>
<level>3</level>
<Events_EventAction>lp_total_clicks=1</Events_EventAction>
<Events_EventCategory>my-website.com%2Fplease-verify-you-age-to-enter-rs%2F</Events_EventCategory>
<CoreHome_VisitIp>XXXX:1009:b00a:6fd8:d937:5eb2:7563:de56</CoreHome_VisitIp>
</row>
</result>
Now, before I import and copy it to my workbook, I have to loop through the XML and delete the child node <is_summary>some value</is_summary> completely (if it exist).
I've been trying out multiple solutions I found online, but so far unsuccessful.
Any help would be greatly appreciated!
Upvotes: 1
Views: 165
Reputation: 42236
Please, try the next way:
Sub testRemoveXMLNode()
'it needs a reference to 'Microsoft XML, v6.0'
Dim xmlPath As String, XDoc As MSXML2.DOMDocument60, n As MSXML2.IXMLDOMNode
xmlPath = "Your XML file full name" 'use the real name
Set XDoc = New MSXML2.DOMDocument60
XDoc.Load (xmlPath)
Debug.Print XDoc.XML & vbCrLf & "___"
Dim strTag As String: strTag = "is_summary"
For Each n In XDoc.DocumentElement.ChildNodes
recursiveTagSrc n, strTag
Next n
Debug.Print XDoc.XML 'just to see the result
'it can be saved as another XML document or overwrite the original XML file (xDoc.Save xmlPath)...
End Sub
Sub recursiveTagSrc(n As MSXML2.IXMLDOMNode, strTag As String)
Dim Nd As MSXML2.IXMLDOMNode
If n.HasChildNodes Then
For Each Nd In n.ChildNodes
If Nd.HasChildNodes Then recursiveTagSrc Nd, strTag
If Nd.nodeName = strTag Then
n.RemoveChild Nd
End If
Next Nd
End If
End Sub
Upvotes: 2
Reputation: 22167
You need to pre-process XML file via XSLT transformation before loading/importing it.
The XSLT below is using a so called Identity Transform pattern.
One single line <xsl:template match="is_summary"/>
prevents that XML element from being in the output XML file.
XSLT
<?xml version='1.0'?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" omit-xml-declaration="no"
encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>
<!--Identity transform-->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="is_summary"/>
</xsl:stylesheet>
VBA
Private Sub Transform(sourceFile, stylesheetFile, resultFile)
Dim source As New MSXML2.DOMDocument60
Dim stylesheet As New MSXML2.DOMDocument60
Dim result As New MSXML2.DOMDocument60
' Load data.
source.async = False
source.Load sourceFile
' Load style sheet.
stylesheet.async = False
stylesheet.Load stylesheetFile
If (source.parseError.ErrorCode <> 0) Then
MsgBox ("Error loading source document: " & source.parseError.reason)
Else
If (stylesheet.parseError.ErrorCode <> 0) Then
MsgBox ("Error loading stylesheet document: " & stylesheet.parseError.reason)
Else
' Do the transform.
source.transformNodeToObject stylesheet, result
result.Save resultFile
End If
End If
End Sub
Upvotes: 4
Reputation: 9948
The following function demonstrates an XMLDOM alternative to @FaneDuru 's working solution by
RemoveChild
method (c.f. section c)instead of traversing all nodes one by one, thus minimizing the checks whether to remove or not.
Side note:
@Yitzhak-Khabinsky 's valid approach doesn't use XMLDOM methods for removal, but XSLT, a special-purpose language designed to transform original XML files (here by removing nodes via identity transform) according to various criteria.
Function cleanXML(ByVal xmlFilename As String, _
ByVal DELNODE As String) _
As MSXML2.DOMDocument60
'a) load src
Dim xDoc As MSXML2.DOMDocument60
Set xDoc = New MSXML2.DOMDocument60
xDoc.async = False
xDoc.Load xmlFilename
'b) get nodelist
Dim nodes As MSXML2.IXMLDOMNodeList
Set nodes = xDoc.SelectNodes("//" & DELNODE)
'c) remove each node nodelist
Dim node As MSXML2.IXMLDOMNode
For Each node In nodes
node.ParentNode.RemoveChild node
Next
'd) return xml as object
Set cleanXML = xDoc
End Function
Example call
Sub ExampleCall()
Dim t As Double: t = Timer
'0) initial definitions
Const DELNODE As String = "is_summary"
Dim fn As String: fn = ThisWorkbook.Path & "\xml\" & "Whatever.xml"
'1) get result with removed DELNODEs
Dim result As MSXML2.DOMDocument60
Set result = cleanXML(fn, DELNODE)
'2) display result
Debug.Print result.xml
Debug.Print Format(Timer - t, "0.0 secs needed")
''3) save result to any destination filename
' Dim destFn As String: destFn = fn ' e.g. overwrite src
' result.Save destFn
End Sub
Upvotes: 2