Reputation: 19
I had some code in excel vba that webscraped hundreds of pages by grabbing all the cells from a table called "Insider Transactions" on particular urls. The following is an example url: https://www.gurufocus.com/stock/HIL/insider
For some reason my code below no longer works. I cannot for the life of me work out why. The class I am trying to grab still seems to be called "normal-table data-table"
I have tried getting rid of the (0) as there appears to be only one table with the class name normal-table data-table now.
Set code is:
Set allCells = doc.body.getElementsByClassName("normal-table data-table")(0).getElementsByTagName("td")
no error messages are given when I run my current code, but it is clear that allCells is not being set to anything because my code doesn't work and allCells.length doesn't return anything. Thanks
Upvotes: 0
Views: 256
Reputation: 84465
XMLHTTP:
Faster than a browser and providing more info is xhr.
The data is provided from an API call. You can scrape the token for this and pass in a subsequent request. A few helper functions to get the token and handle results as well as a json parser to handle json response from API.
This requires installing code for jsonparser from jsonconverter.bas in a standard module called JsonConverter and then going VBE>Tools>References>Add a reference to Microsoft Scripting Runtime.
Option Explicit
Public Sub GetInfo()
Dim json As Object, headers(), ws As Worksheet, i As Long, results()
Dim re As Object, r As Long, c As Long, dict As Object, p As String, token As String, s As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
p = "password_grant_custom\.client"":""(.*?)"""
Set re = CreateObject("VBScript.RegExp")
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.gurufocus.com/stock/HIL/insider", False
.send
token = GetToken(re, .responseText, p)
If token = "Not found" Then Exit Sub
.Open "GET", "https://www.gurufocus.com/reader/_api/stocks/NYSE:HIL/insider?page=1&per_page=1000&sort=date%7Cdesc", False
.setRequestHeader "authorization", "Bearer " & token
.send
s = .responseText
Set json = JsonConverter.ParseJson(.responseText)("data")
headers = json(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each dict In json
r = r + 1: c = 1
For i = LBound(headers) To UBound(headers)
If headers(i) <> "ownership_details" Then
results(r, c) = dict(headers(i))
Else
results(r, c) = EmptyDict(dict(headers(i)))
End If
c = c + 1
Next
Next
End With
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function EmptyDict(ByVal dict As Object, Optional r As String, Optional key As Variant) As String
Dim s As String
For Each key In dict
If TypeName(dict(key)) = "Dictionary" Then
r = EmptyDict(dict(key), r, key)
Else
s = IIf(key = "D", "Direct ", key)
r = r & s & " " & dict(key) & Chr$(10)
End If
Next
EmptyDict = r
End Function
Public Function GetToken(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As String
With re
.Global = True
.pattern = pattern
If .test(inputString) Then ' returns True if the regex pattern can be matched agaist the provided string
GetToken = .Execute(inputString)(0).SubMatches(0)
Else
GetToken = "Not found"
End If
End With
End Function
Sample of output:
Using browser and also setting results to 100 per page:
The following dimisses login message if present.
Option Explicit
Public Sub GetData()
Dim ie As Object, clipboard As Object, ws As Worksheet
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
Set ie = CreateObject("InternetExplorer.Application")
With ie
.Visible = True
.Navigate2 "https://www.gurufocus.com/stock/HIL/insider"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
If .querySelectorAll(".login-card").Length > 0 Then
.querySelector(".login-card .el-icon-close").Click
End If
.querySelector(".el-icon-caret-bottom").Click
.querySelector(".aio-popover-item:nth-of-type(6)").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
clipboard.SetText .document.querySelector(".data-table").outerHTML
clipboard.PutInClipboard
ws.Range("A1").PasteSpecial
.Quit
End With
End Sub
Upvotes: 2
Reputation: 348
Try
window.addEventListener('load', () => {
let data = document.body.getElementsByClassName("normal-table data-table")[0].getElementsByTagName("td");
// do something with data
})
Instead of the round '(' brackets, it seems fine. But, it could be possible that the data in table is loading after your function runs, so its throwing error.
You can refer to this post
Upvotes: 0