Maria Georgali
Maria Georgali

Reputation: 659

Cannot exclude tags which are inside a <li> tag with vba

I have several pages like the following

https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html

I want to extract with vba data like price,availability,seller name When I try the following

ie.Navigate "https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html"

Do While ie.Busy = True Or ie.ReadyState <> 4: DoEvents: Loop

Dim NodeList As Object, currentItem As Long
Dim outputString As String
Set NodeList = ie.Document.querySelectorAll(".card.js-product-card")
With ActiveSheet
    For currentItem = 0 To NodeList.Length - 1
        outputString = outputString & vbCrLf & NodeList.Item(currentItem).innerText
    Next currentItem
    .Cells(2, 6) = Trim$(outputString)
End With

I get

the whole data of "li" tag how to exclude all other data and keep what I want?

Upvotes: 0

Views: 68

Answers (1)

QHarr
QHarr

Reputation: 84465

This uses a loop and scrolling to generate the full list of items and then targets specific information by various css selectors

Option Explicit
Public Sub GetInfo()
    Dim ie As New InternetExplorer, i As Long
    Const MAX_WAIT_SEC As Long = 20

    With ie
        .Visible = True
        .Navigate2 "https://www.skroutz.gr/s/2195774/Western-Digital-Blue-3-5-1TB-7200rpm.html"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim finalPrices As Object, sellers As Object, availability As Object
        Dim products As Object, t As Date
        Set products = .document.querySelectorAll(".card.js-product-card")
        t = Timer
        Do
            DoEvents
            ie.document.parentWindow.execScript "window.scrollBy(0, window.innerHeight);", "javascript"
            Set finalPrices = .document.querySelectorAll(".card.js-product-card span.final-price")
            Application.Wait Now + TimeSerial(0, 0, 1)
            If Timer - t > MAX_WAIT_SEC Then Exit Do
        Loop Until finalPrices.Length = products.Length

        Set sellers = .document.querySelectorAll(".card.js-product-card .shop.cf a[title]")
        Set availability = .document.querySelectorAll(".card.js-product-card span.availability")

        With ThisWorkbook.Worksheets("Sheet1")
            For i = 0 To sellers.Length - 1
                .Cells(i + 1, 1) = sellers.item(i)
                .Cells(i + 1, 2) = finalPrices.item(i).innerText
                .Cells(i + 1, 3) = availability.item(i).innerText
            Next
        End With
        .Quit
    End With
End Sub

Upvotes: 1

Related Questions