Reputation: 22440
I'm trying to grab a certain portion of information from a webpage using xmlhttp
request. When I execute my script, it throws an error Object Variable Or With---
. But, when I try the same using IE
, I get the content like magic.
The most important thing to notice is that the content I'm expecting to grab are neither javascript encrypted nor generated dynamically. So, I should get them using xhr
. Where I'm going wrong?
Using IE
(working one):
Sub GetText()
Const Url As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim IE As New InternetExplorer, HTML As HTMLDocument, post As Object
With IE
.Visible = False
.navigate Url
While .Busy = True Or .readyState < 4: DoEvents: Wend
Set HTML = .document
End With
Set post = HTML.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
Using XHR (not working):
Sub GetText()
Const Url As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim Http As New XMLHTTP60, HTML As New HTMLDocument, post As Object
With Http
.Open "GET", Url, False
.send
HTML.body.innerHTML = .responseText
End With
Set post = HTML.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
The selector
I've defined above is flawless.
I could have pasted here the relevant html elements
but they are wrapped within comments
. However, I've provided above the link to that site.
To be clearer: the portion of text I'm interested in looks exactly like below in that webpage.
My question: how can I get the aforementioned textblock (which is shown above within image) using XHR?
Upvotes: 1
Views: 299
Reputation: 84465
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
html.body.innerHTML = html.querySelector("#all_9711922514").LastChild.Data
Debug.Print html.querySelector("#div_9711922514").innerText
End With
End Sub
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument, ele As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
For Each ele In html.querySelector("#all_9711922514").Children
If ele.NodeType = 8 Then
html.body.innerHTML = ele.Data
Debug.Print html.querySelector("#div_9711922514").innerText
Exit For
End If
Next
End With
End Sub
Option Explicit
Public Sub GetInfo()
Dim sResponse As String, html As New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml", False
.send
sResponse = StrConv(.responseBody, vbUnicode)
End With
sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))
With html
.body.innerHTML = sResponse
Dim s As String
s = .querySelector("div[id=all_1786105919]").outerHTML
s = regexRemove(s, "<([^>]+)>")
Debug.Print Replace$(Replace$(s, "&", "°"), "-->", vbNullString)
End With
End Sub
Public Function regexRemove(ByVal s As String, ByVal pattern As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.pattern = pattern
End With
If regex.test(s) Then
regexRemove = regex.Replace(s, vbNullString)
Else
regexRemove = s
End If
End Function
Output:
Upvotes: 1
Reputation: 22440
The solution is plain and simple. All you need to do is kick out the comment signs
from responseText
using Replace()
function or so and then filter them using Html.body.innerHTML
to make them proper html contents
. The rest is as usual.
This is how you can get the content:
Sub GetTextFromComment()
Const URL As String = "https://www.baseball-reference.com/boxes/ANA/ANA201806180.shtml"
Dim Http As New XMLHTTP60, Html As New HTMLDocument, post As Object
With Http
.Open "GET", URL, False
.send
Html.body.innerHTML = Replace(Replace(.responseText, "<!--", ""), "-->", "")
End With
Set post = Html.querySelectorAll(".section_content")(2)
MsgBox post.innerText
End Sub
Upvotes: 3