Reputation: 8230
<div class="game_content">
<div class="game_table">
<article class="league_name">
<span class="flags flags-category flags--uefa-europa-league"></span>
<h6 class="hide-on-small-only hide-on-med-only hide_ipadLand"><a title="International">International</a></h6>
<h3><a href="/en/league-games/1/uefa-europa-league" title="UEFA Europa League">UEFA Europa League</a></h3>
</article> <div class="game_detailsCol">
<div class="col s2 m1 l1">
<span class="red-text"><span class="green-text bold">F</span><span class="grey-text">21:45</span></span>
</div>
<div class="col s3 m4 l5">
<div>
<p class="yellow_card">3</p>
</div>
<a href="/en/teams/Marseille" title="Marseille livescore">Marseille</a>
</div>
<div class="col s1 m1 l1">
<a href="/en/game-info/467162/marseille-atletico-madrid" title="Marseille-Atletico Madrid livescore" class="">
0-3 </a>
</div>
<div class="col s3 m4 l5">
<a href="/en/teams/Atletico+Madrid" title="Atletico Madrid livescore">Atletico Madrid</a>
<div>
<span class="grey-text">0-1</span>
<p class="yellow_card">2</p> </div>
</div>
<div class="col s2 m1 l1">
<i class="icon icon-star " game_id="467162"></i>
<a class="dropdown-button hide-on-small-only hide-on-med-only hide_ipadLand" data-activates="dropdown467162">
<i class="icon icon-more_vert"></i>
</a><ul id="dropdown467162" class="dropdown-content">
<li class="close_dropMenu"><strong>x</strong></li>
<li><a href="/en/h2h/467162/marseille-atletico-madrid" title="Head To Head">Head To Head</a></li>
</ul>
</div>
</div>
Guys,
I'm new in scraping information from websites via VBA so help is needed. i want to loop through div class="game_content" and in each div class="game_table" scrap the below info:
FROM title="UEFA Europa League" GET "UEFA Europa League"
FROM title="Marseille livescore">Marseille GET "Marseille"
FROM title="Marseille-Atletico Madrid livescore" class="">0-3 GET 0-3
FROM title="Atletico Madrid livescore">Atletico Madrid GET Atletico Madrid
FROM <span class="grey-text">0-1</span> GET 0-1
My post includes the FIRST class="game_table FROM class="game_content"
My piece of code:
Sub InternetConnection()
Dim IE As InternetExplorer
Dim HTMLDocument As HTMLDocument
Dim GameList As IHTMLElement
Dim Games As IHTMLElementCollection
Dim Game As IHTMLElement
'Open Internet Explorer
Set IE = New InternetExplorer
IE.Visible = True
IE.Navigate ("https://azscore.com/en/yesterday")
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading..."
Loop
Set HTMLDocument = IE.document
Set GameList = HTMLDocument.getElementsByClassName("game_content")(0)
Set Games = GameList.Children
For Each Game In Games
If Game.className = "game_table" Then
End If
Next
End Sub
Any help will be appreciate.
Upvotes: 0
Views: 188
Reputation: 84465
Required references via tools > references
Note:
Different output as now run on a different day against the website.
Option Explicit
Public Sub PrintMatchInfo()
Application.ScreenUpdating = False
Dim IE As InternetExplorer, doc As HTMLDocument
Set IE = New InternetExplorer
With IE
.Visible = True
.Navigate2 "https://azscore.com/en/yesterday"
While .Busy Or .readyState < 4: DoEvents: Wend
Set doc = .document
End With
Dim games As Object, i As Long, j As Long, counter As Long
counter = 1
Set games = doc.getElementsByClassName("game_table")
For i = 0 To games.Length - 1
For j = 0 To games(i).getElementsByClassName("game_detailsCol").Length - 1
With ThisWorkbook.Worksheets("Sheet2")
.Cells(counter, 1) = Split(games(i).innerText, Chr$(10))(3)
Dim tempArr() As String
tempArr() = Split(GetKeyDetails(ReplacedString(games(i).getElementsByClassName("game_detailsCol")(j).innerText)), Chr$(126))
.Cells(counter, 2).Resize(1, UBound(tempArr) + 1) = tempArr
counter = counter + 1
End With
Next j
Next i
Application.ScreenUpdating = True
End Sub
Public Function ReplacedString(ByVal str As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\s{2,}"
End With
ReplacedString = regex.Replace(str, Chr$(126)) '<== or other unexpected character
End Function
Public Function GetKeyDetails(ByVal inputString As String) As String
Dim regex As Object, tempString As String
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "(^[A-Z](?:\d|[01]\d|2[0-3]):[0-5]\d~)(\d?~?)+" '"^[A-Z](?:\d|[01]\d|2[0-3]):[0-5]\d~(\d~)?(\d~)?|x\~Head To Head"
End With
tempString = regex.Replace(inputString, vbNullString)
Dim tempArr() As String
tempArr = Split(tempString, Chr$(126))
Dim keyDetails(), outputString As String, detail As Long
keyDetails = Array(0, 1, 2, 3)
For detail = LBound(keyDetails) To UBound(keyDetails)
outputString = outputString & Chr$(126) & tempArr(keyDetails(detail))
Next detail
GetKeyDetails = Trim$(outputString)
End Function
Output:
Code:
Option Explicit
Public Sub PrintAllMatchInfo()
Application.ScreenUpdating = False
Dim IE As InternetExplorer, doc As HTMLDocument
Set IE = New InternetExplorer
With IE
.Visible = True
.Navigate2 "https://azscore.com/en/yesterday"
While .Busy Or .readyState < 4: DoEvents: Wend
Set doc = .document
End With
Dim games As Object, i As Long, j As Long, counter As Long
counter = 1
Set games = doc.getElementsByClassName("game_table")
For i = 0 To games.Length - 1
For j = 0 To games(i).getElementsByClassName("game_detailsCol").Length - 1
With ThisWorkbook.Worksheets("Sheet1")
.Cells(counter, 1) = Split(games(i).innerText, Chr$(10))(3)
.Cells(counter, 2) = ReplacedString(games(i).getElementsByClassName("game_detailsCol")(j).innerText)
counter = counter + 1
End With
Next j
Next i
Application.ScreenUpdating = True
End Sub
Public Function ReplacedString(ByVal str As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\s{2,}"
End With
ReplacedString = regex.Replace(str, " ") '<== or other unexpected character
End Function
Output:
Pattern for your follow-up question: "^\w+\/(\d+~)+"
Upvotes: 1