Smith O.
Smith O.

Reputation: 217

How to extract values from nested divs using VBA

I have looked at the solution provided in this link Extract Table from Webpage in Excel using VBA and it was very helpful. But I need to extract the values in the div classes (cscore_score) and not a table Please refer to image below

  1. The URL is: https://www.espncricinfo.com/scores

  2. The div class is: cscore_score

  3. The scores to extract is in nested divs. The sample data for each nested div I want to extract is like Country and Score i.e INDIA and in the next column "416..." into the Excel sheet.

Here's a screenshot of the table structure:

enter image description here

Public Sub GetInfo()
Const URL As String = "https://www.espncricinfo.com/scores"
Dim html As HTMLDocument, hTable As HTMLTable, ws As Worksheet, headers()

Set ws = ThisWorkbook.Worksheets("Sheet1")
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", URL, False
    .send
    html.body.innerHTML = .responseText
End With
 Set hDiv = html.querySelector("div.cscore")
Dim ul As Object, div As Object, r As Long, c As Long

r = 1
With ws

    For Each div In hDiv.getElementsByClassName("cscore_link")
        r = r + 1: c = 1
        If r > 3 Then
            For Each ul In div.getElementsByClassName("cscore_score")
                .Cells(r - 2, c) = IIf(c = 2, "'" & div.innerText, div.innerText)
                c = c + 1
            Next
        End If
    Next
End With
End Sub

I would be grateful to receive any help to extract those scores from each div into the sheet.

Upvotes: 1

Views: 1333

Answers (2)

QHarr
QHarr

Reputation: 84465

You could use faster css selectors (using only class is faster than tag/type) which if used as shown below will allow you to also reduce your code complexity and improve performance by having only a single loop. Results can then be stored in an array and written out in one go - again another efficiency gain.

Note I am ensuring scores remain correctly formatted on output by concatenating "'" in front.


If you want scores for same match on same row:

Option Explicit

Public Sub GetData()
    Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long, r As Long

    Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.espncricinfo.com/scores", False
        .send
        html.body.innerHTML = .responseText
    End With

    Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
    ReDim results(1 To countries.Length / 2, 1 To 4)

    For i = 0 To countries.Length - 1 Step 2
        results(r, 1) = countries.item(i).innerText: results(r, 2) = "'" & scores.item(i).innerText
        results(r, 3) = countries.item(i + 1).innerText: results(r, 4) = "'" & scores.item(i + 1).innerText
        r = r + 1
    Next
    ws.Cells(1, 1).Resize(1, 4) = Array("Home", "Score", "Away", "Score")
    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Scores on different rows for every team:

Option Explicit    
Public Sub GetData()
    Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object, results(), i As Long

    Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.espncricinfo.com/scores", False
        .send
        html.body.innerHTML = .responseText
    End With

    Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
    ReDim results(1 To countries.Length, 1 To 2)
    For i = 0 To countries.Length - 1
        results(i + 1, 1) = countries.item(i).innerText: results(i + 1, 2) = "'" & scores.item(i).innerText
    Next
    ws.Cells(1, 1) = "Country": ws.Cells(1, 2) = "Score"
    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Additional column:

Public Sub GetData()
    Dim html As HTMLDocument, ws As Worksheet, countries As Object, scores As Object
    Dim descs As Object, results(), i As Long, r As Long

    Set ws = ThisWorkbook.Worksheets("Sheet1"): Set html = New HTMLDocument: r = 1

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.espncricinfo.com/scores", False
        .send
        html.body.innerHTML = .responseText
    End With

    Set countries = html.querySelectorAll(".cscore--watchNotes .cscore_name--long"): Set scores = html.querySelectorAll(".cscore_score ")
    Set descs = html.querySelectorAll(".cscore--watchNotes .cscore_info-overview")
    ReDim results(1 To countries.Length / 2, 1 To 5)

    For i = 0 To countries.Length - 1 Step 2
        results(r, 1) = descs.Item(i / 2).innerText
        results(r, 2) = countries.Item(i).innerText: results(r, 3) = "'" & scores.Item(i).innerText
        results(r, 4) = countries.Item(i + 1).innerText: results(r, 5) = "'" & scores.Item(i + 1).innerText
        r = r + 1
    Next
    ws.Cells(1, 1).Resize(1, 5) = Array("Desc", "Home", "Score", "Away", "Score")
    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

Upvotes: 2

Stavros Jon
Stavros Jon

Reputation: 1697

Your request seems to be just fine. Parsing the HTML is where your problem is. You could do something like the following (you can ignore the request part):

Option Explicit

Sub espn()
Dim req As New WinHttpRequest
Dim HTMLDocument As New HTMLDocument
Dim listElement As HTMLUListElement
Dim listItem As HTMLLIElement
Dim sht As Worksheet
Dim i As Long
Dim j As Long

Dim url As String
url = "https://www.espncricinfo.com/scores"
With req
    .Open "GET", url, False
    .send
    HTMLDocument.body.innerHTML = .responseText
End With

Set sht = ThisWorkbook.Worksheets("Name of your worksheet")
i = 2
For Each listElement In HTMLDocument.getElementsByClassName("cscore_competitors")
    j = 1
    For Each listItem In listElement.getElementsByTagName("li")
        sht.Cells(i, j) = listItem.getElementsByClassName("cscore_name cscore_name--long")(0).innerText
        sht.Cells(i, j + 1) = listItem.getElementsByClassName("cscore_score")(0).innerText
        j = j + 2
    Next listItem
    i = i + 1
Next listElement

End Sub

The results would look like so:

enter image description here

Basically each game is represented by a ul (unnumbered list) element which consists of two li elements which contain the info about the names and the score.

Upvotes: 1

Related Questions