Poseidon
Poseidon

Reputation: 45

VBA - WebScraping Get elements for classname for equal classNames

I want to extract two values (numbers and positions) per player which have an equal class name "text". I am currently unable to select the two correct values per player.

My problem is I actually have only the first and the second value in "HTMLnumbers" and "HTMLposition". Otherwise if I select all items for the class "text", the first player gets the value for number and the second player gets the value for position. Thats also not correct.

enter image description here

Option Explicit

Sub erweiterteWerte()

Dim IE As New SHDocVw.InternetExplorer
Dim HTMLdoc As MSHTML.HTMLDocument
Dim HTMLplayerRow As MSHTML.IHTMLElementCollection

Dim i As Integer
Dim j As Integer

Dim HTMLnumbers As Object
Dim HTMLposition As Object

Dim numbers As String
Dim position As String

Dim letzteZeile As Long
Dim aktuelleZeile As Long

IE.Visible = False
IE.Navigate "https://examplexyz.de"

Do While IE.ReadyState <> READYSTATE_COMPLETE
Loop

Application.Wait (Now + TimeValue("0:00:7"))

Set HTMLdoc = IE.Document

Set HTMLplayerRow = HTMLdoc.getElementsByClassName("playerRow")
Set HTMLnumbers = HTMLplayerRow(0).getElementsByClassName("text")

   If Not HTMLnumbers Is Nothing Then
   numbers = HTMLnumbers.Item(0).innerText
   position = HTMLnumbers.Item(1).innerText
    
   Else
     numbers = "no_value"
   End If

Debug.Print numbers
Debug.Print position

IE.Quit

End Sub

Upvotes: 0

Views: 1107

Answers (1)

Tim Williams
Tim Williams

Reputation: 166306

Untested, but to illustrate the basic approach:

Sub erweiterteWerte()

    Dim IE As SHDocVw.InternetExplorer
    Dim HTMLdoc As MSHTML.HTMLDocument
    Dim playerRows As MSHTML.IHTMLElementCollection
    Dim playerBadges As MSHTML.IHTMLElementCollection
    Dim player As Object, badge As Object
    
    Set IE = New SHDocVw.InternetExplorer
    IE.Visible = False
    IE.Navigate "https://play.kickbase.com/transfermarkt/kaufen"
    
    Do While IE.ReadyState <> READYSTATE_COMPLETE
    Loop
    Application.Wait (Now + TimeValue("0:00:07"))
    
    Set HTMLdoc = IE.Document
    Set playerRows = HTMLdoc.getElementsByClassName("playerRow")
    
    For Each player In playerRows
        
        Debug.Print "---------------"
        Debug.Print classText(player, "firstName") & " " & classText(player, "lastName")
        
        Set playerBadges = player.getElementsByClassName("badge")
        For Each badge In playerBadges
            Debug.Print badge.innerText
        Next badge
    
    Next player
    
    IE.Quit

End Sub

'Helper function to get a child (of `obj`) element's text using its className
'  (only handles a single instance but could be extended)
Function classText(obj As Object, classname As String) As String
    Dim els As Object
    Set els = obj.getElementsByClassName(classname)
    If els.Length > 0 Then
        classText = els(0).innerText
    Else
        classText = "[not found]"
    End If
End Function

Upvotes: 2

Related Questions