Paul Green
Paul Green

Reputation: 43

Parse data from online XML feed using VBA - stopped working

we have a little routine here that has run without a hitch in the background for many years which contacts MI5 each morning, reads the XML file on the MI5 website (https://www.mi5.gov.uk/UKThreatLevel/UKThreatLevel.xml), and updates our on-duty staff of the current threat level. This has worked consistently until late last year.

It appears that in late 2024 the UK Government have commissioned Cloudflare security on the MI5 website, and as such when our script attempts to read the online XML, it fails. The page is still accessible in Chrome.

Playing around with different variants of VBA code that I've found around Stack Overflow, the XML HTTP text node has

: responseText : "Just a moment...<meta http-equiv="X-UA-Compatible" con"

and the Status is showing as 403.

The question is, is there a way with VBA using MSXML6 to either get the program to wait out until Cloudflare refreshes with the correct data, or if the status 403 is as a result of the website detecting VBA as a non-standard browser, is there a way of spoofing the header so the website in question thinks it's Chrome/edge, etc.

Any pointers would be appreciated.

I tried the following code which is failing. When I save the XML and parse it locally, it works. When I access the BBC's news XML, it works.

Sub GetThreatLevel()
'On Error Resume Next
Dim strpath As String
Dim dblRate As Double
Dim i As Integer

Dim xmlOBject As MSXML2.DOMDocument60
Dim xmlNode As MSXML2.IXMLDOMNode

Dim intLength As Integer
Set xmlOBject = New MSXML2.DOMDocument60
'website path
strpath = "https://www.mi5.gov.uk/UKThreatLevel/UKThreatLevel.xml"
'strpath = "https://feeds.bbci.co.uk/news/rss.xml"
'strPath = "c:\FOO\Threat.xml"

With xmlOBject
    .async = False
    .resolveExternals = False
    .validateOnParse = False
    .Load (strpath)
End With

'get the query node
intLength = xmlOBject.childNodes.Length - 1
For i = 0 To intLength
If xmlOBject.childNodes.Item(i).BaseName = "rss" Then
Set xmlNode = xmlOBject.childNodes.Item(i)
i = intLength + 1
End If
Next i
'get the result node
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "channel" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "item" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i
intLength = xmlNode.childNodes.Length - 1
For i = 0 To intLength
If xmlNode.childNodes.Item(i).BaseName = "description" Then
Set xmlNode = xmlNode.childNodes.Item(i)
i = intLength + 1
End If
Next i

intLength = xmlNode.childNodes.Length - 1

Debug.Print xmlNode.nodeTypedValue
DBSLastCheck = Date
dbslastresult = Nz(xmlNode.nodeTypedValue, "NO VALUE RETURNED")


End Sub

Upvotes: 4

Views: 92

Answers (2)

Gustav
Gustav

Reputation: 55981

This works here with Access 365 64-bit:

Option Compare Database
Option Explicit

' Enums.
'
' HTTP status codes, reduced.
Private Enum HttpStatus
    OK = 200
    BadRequest = 400
    Unauthorized = 401
    Forbidden = 403
End Enum

Public Function ThreatLevel() As String

    ' Operational constants.
    '
    ' Base URL for MI5 UK threat level status.
    Const ServiceUrl        As String = "https://www.mi5.gov.uk/UKThreatLevel/"
    ' File to look up.
    Const Filename          As String = "UKThreatLevel.xml"
    
    ' Function constants.
    '
    ' Async setting.
    Const Async             As Variant = False
    ' XML node and attribute names.
    Const RootNodeName      As String = "rss"
    Const ChannelNodeName   As String = "channel"
    Const ItemNodeName      As String = "item"
    Const TextNodeName      As String = "description"
  
    ' Microsoft XML, v6.0.
    Dim Document            As MSXML2.DOMDocument60
    Dim XmlHttp             As MSXML2.ServerXMLHTTP60
    Dim RootNodeList        As MSXML2.IXMLDOMNodeList
    Dim ChannelNodeList     As MSXML2.IXMLDOMNodeList
    Dim TextNodeList        As MSXML2.IXMLDOMNodeList
    Dim RootNode            As MSXML2.IXMLDOMNode
    Dim ChannelNode         As MSXML2.IXMLDOMNode
    Dim ItemNode            As MSXML2.IXMLDOMNode
    Dim TextNode            As MSXML2.IXMLDOMNode
    Dim Description         As String
    Dim Url                 As String

    Set Document = New MSXML2.DOMDocument60
    Set XmlHttp = New MSXML2.ServerXMLHTTP60
    
    Url = ServiceUrl & Filename
    
    ' Retrieve data.
    XmlHttp.Open "GET", Url, Async
    XmlHttp.send
    
    If XmlHttp.Status = HttpStatus.OK Then
        ' File retrieved successfully.
        Document.loadXML XmlHttp.responseText
    
        Set RootNodeList = Document.getElementsByTagName(RootNodeName)
        ' Find root node.
        For Each RootNode In RootNodeList
            If RootNode.nodeName = RootNodeName Then
                Exit For
            Else
                Set RootNode = Nothing
            End If
        Next

        If Not RootNode Is Nothing Then
            If RootNode.hasChildNodes Then
                ' Find first level Channel node.
                Set ChannelNodeList = RootNode.childNodes
                For Each ChannelNode In ChannelNodeList
                    If ChannelNode.nodeName = ChannelNodeName Then
                        Exit For
                    Else
                        Set ChannelNode = Nothing
                    End If
                Next
            End If
        End If
        
        If Not ChannelNode Is Nothing Then
            If ChannelNode.hasChildNodes Then
                ' Find Item node.
                Set ChannelNodeList = ChannelNode.childNodes
                For Each ItemNode In ChannelNodeList
                    If ItemNode.nodeName = ItemNodeName Then
                        Exit For
                    Else
                        Set ItemNode = Nothing
                    End If
                Next
            End If
        End If
        
        If Not ItemNode Is Nothing Then
            If ItemNode.hasChildNodes Then
                ' Find Text node.
                Set TextNodeList = ItemNode.childNodes
                For Each TextNode In TextNodeList
                    If TextNode.nodeName = TextNodeName Then
                        Description = TextNode.nodeTypedValue
                        Exit For
                    Else
                        Set TextNode = Nothing
                    End If
                Next
            End If
        End If

    End If
    
    ThreatLevel = Description

End Function

Output:

The current national threat level is SUBSTANTIAL. The threat to Northern Ireland from Northern Ireland-related terrorism is SUBSTANTIAL.

Upvotes: 0

Haluk
Haluk

Reputation: 1586

You may try this Excel VBA code which is written and tested on Excel 2010 - 64 Bit

Make sure that Excel file is saved as "*.xlsm" before running the code.

Sub Test()
    Dim objHTTP As Object, strURL As String
    Dim AdoStream As Object, xDoc As Object, myList As Object, iCount As Long, i As Long
    
    Const adTypeText = 2
    Const adSaveCreateOverWrite = 2
    
    Set objHTTP = CreateObject("MSXML2.ServerXMLHTTP")
    strURL = "https://www.mi5.gov.uk/UKThreatLevel/UKThreatLevel.xml"
 
    objHTTP.Open "GET", strURL, False
    objHTTP.send
    
    If objHTTP.Status = 200 Then
        Set AdoStream = CreateObject("ADODB.Stream")
        AdoStream.Type = adTypeText
        AdoStream.Charset = "utf-8"
        AdoStream.Open
        AdoStream.WriteText objHTTP.responseText
        AdoStream.SaveToFile ThisWorkbook.Path & "\rss.xml", adSaveCreateOverWrite
             
        Set xDoc = CreateObject("MSXML2.DOMDocument")
        xDoc.async = False
        xDoc.validateOnParse = False
           
        xDoc.Load ThisWorkbook.Path & "\rss.xml"
         
        Set myList = xDoc.SelectNodes("//item")
        iCount = myList.Length - 1
        
        If myList.Length = 0 Then GoTo SafeExit:
        
        Range("A1:E1") = Array("No", "Title", "Description", "Publish Date", "Subject")
        Range("A2:E" & Rows.Count) = ""
        
        For i = 0 To iCount
            Cells(i + 2, 1) = i + 1
            Cells(i + 2, 2) = myList(i).SelectSingleNode("title").Text
            Cells(i + 2, 3) = myList(i).SelectSingleNode("description").Text
            Cells(i + 2, 4) = myList(i).SelectSingleNode("pubDate").Text
            Cells(i + 2, 5) = myList(i).SelectSingleNode("subject").Text
        Next
        
        Kill ThisWorkbook.Path & "\rss.xml"
        
        Columns("A:A").ColumnWidth = 4
        Columns("B:E").ColumnWidth = 30
    Else
        MsgBox objHTTP.StatusText
    End If
    
SafeExit:
    Set xDoc = Nothing
    Set myList = Nothing
    Set AdoStream = Nothing
    Set objHTTP = Nothing
End Sub

Upvotes: 0

Related Questions