kiran mamalwad
kiran mamalwad

Reputation: 159

Data Scraping by Tag and Class

I'm trying to copy data from web-site, I need the all range of sizes,Price,Amenities,Specials, Reserve. I frame below code but I'm NOT able to copy element the below is now working. getting to many errors. Can anybody please look into this?

 Sub gostoreit()

Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
    .Visible = True
    .Navigate2 "" & 
"https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"

    While .Busy Or .readyState < 4: DoEvents: Wend

    Dim listings As Object, listing As Object, headers(), results(), r 
As Long, c As Long, item As Object
    headers = Array("Size", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
    Set listings = .document.getElementsByTagName("l-main-container")
    ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)
    For Each listing In listings

        r = r + 1

        results(r, 1) = listing.getElementsByClassName("size_txt")(0).innerText 'Size
        results(r, 2) = listing.getElementsByClassName("helpDiscounts ls_discountsTitleSmall")(0).innerText 'promo(example. First Month Free)
        results(r, 3) = listing.getElementsByClassName("wasPrice")(0).innerText 'reguler price
        results(r, 4) = listing.getElementsByClassName("ls_unit_price")(0).innerText 'online price
        results(r, 5) = listing.getElementsByClassName("unitSelectButtonRES isRESBut")(0).innerText ' listing active
        results(r, 6) = listing.getElementsByClassName("tableUnitType _uSpan")(0).innerText ' features


    Next
    ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
    ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    .Quit

End With
End Sub

Upvotes: 3

Views: 110

Answers (2)

QHarr
QHarr

Reputation: 84465

Use the iframe src and then process way we have discussed before (as my preference) i.e. identify rows then dump row html into surrogate HTMLDocument variable to leverage querySelector at more granular level. I've ignored reserve, as this shows no variation and you can auto-populate these with default. If wanted they can easily be added.

Option Explicit

'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
    Dim ie As Object
    Set ie = CreateObject("InternetExplorer.Application")
    With ie
        .Visible = True
        .Navigate2 "https://www.clickandstor.com/CAS_2.5.16/sorter/controller.php?fid=1162&mode=unit-table-p&target=casDiv1&width=100%25&height=100px&js=1&displayId=lsFramer_0&u=https%3A%2F%2Fwww.gostoreit.com%2Flocations%2Fgeorgia%2Fcumming%2Fgo-store-cumming%2F&&v_in=2.5.16&dn=1559990768103&1559990768"

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
        headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
        Set html2 = New HTMLDocument

        Do
            Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
        Loop While rows.Length = 0
        ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
        On Error Resume Next
        For i = 1 To rows.Length - 1
            html2.body.innerHTML = rows.item(i).outerHTML
            results(i, 1) = html2.querySelector(".size_txt").innerText
            results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
            results(i, 3) = html2.querySelector(".wasPrice").innerText
            results(i, 4) = html2.querySelector(".ls_unit_price").innerText
            results(i, 5) = html2.querySelector(".helpDiscounts").innerText
        Next
        On Error GoTo 0
        .Quit
    End With
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

Public Function GetDescription(ByVal nodeList As Object)
    Dim i As Long, arr()
    ReDim arr(0 To nodeList.Length - 1)
    For i = 0 To nodeList.Length - 1
        arr(i) = nodeList.item(i).innerText
    Next
    GetDescription = Join$(arr, Chr$(32))
End Function

If you want more verbose method of going via iframe. I choose to navigate on to the src of the iframe but you can use .document.getElementById("lsFramer_0").contentDocument.querySelector syntax to access

Option Explicit

'VBE > Tools > References: Microsoft Internet Controls
Public Sub GetData()
    Dim ie As Object
    Set ie = CreateObject("InternetExplorer.Application")
    With ie
        .Visible = True
        .Navigate2 "https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
         While .Busy Or .readyState < 4: DoEvents: Wend
        .Navigate2 .document.querySelector("#lsFramer_0").src
        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim html2 As HTMLDocument, rows As Object, i As Long, results(), headers()
        headers = Array("Size", "Description", "On site price", "Web Price", "Offer")
        Set html2 = New HTMLDocument

        Do
            Set rows = .document.querySelectorAll(".unitRow") '.size_txt")
        Loop While rows.Length = 0
        ReDim results(1 To rows.Length, 1 To UBound(headers) + 1)
        On Error Resume Next
        For i = 1 To rows.Length - 1
            html2.body.innerHTML = rows.item(i).outerHTML
            results(i, 1) = html2.querySelector(".size_txt").innerText
            results(i, 2) = GetDescription(html2.querySelectorAll(".unitMoreHelpTitle, .pop_spacer_li"))
            results(i, 3) = html2.querySelector(".wasPrice").innerText
            results(i, 4) = html2.querySelector(".ls_unit_price").innerText
            results(i, 5) = html2.querySelector(".helpDiscounts").innerText
        Next
        On Error GoTo 0
        .Quit
    End With
    With ThisWorkbook.Worksheets("Sheet1")
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With
End Sub

Public Function GetDescription(ByVal nodeList As Object)
    Dim i As Long, arr()
    ReDim arr(0 To nodeList.Length - 1)
    For i = 0 To nodeList.Length - 1
        arr(i) = nodeList.item(i).innerText
    Next
    GetDescription = Join$(arr, Chr$(32))
End Function

Upvotes: 2

Alvaro CC
Alvaro CC

Reputation: 142

Hi , The code I formatted bellow is running fine for me until "ReDim results" line

The problem looks to be that there is not "l-main-container" element at the web page (see picture bellow)

Not main-container

Sub gostoreit()

Dim ie As New InternetExplorer, ws As Worksheet
Dim element As IHTMLElement
Set ws = ThisWorkbook.Worksheets("Unit Data")
With ie
.Visible = True
.Navigate2 "" & "https://www.gostoreit.com/locations/georgia/cumming/go-store-cumming/"
While .Busy Or .readyState < 4: DoEvents: Wend

Dim listings As Object, listing As Object, headers(), results(), r As Long, c As Long, item As Object
headers = Array("Size", "promo", "Reguler Price", "Online Price", "Listing Active", "features")
Set listings = .document.getElementsByTagName("l-main-container")
ReDim results(1 To listings.Length, 1 To UBound(headers) + 1)

For Each listing In listings
  r = r + 1
  results(r, 1) = listing.getElementsByClassName("size_txt")(0).innerText 'Size
  results(r, 2) = listing.getElementsByClassName("helpDiscounts ls_discountsTitleSmall") 
 (0).innerText 'promo(example. First Month Free)
  results(r, 3) = listing.getElementsByClassName("wasPrice")(0).innerText 'reguler price
  results(r, 4) = listing.getElementsByClassName("ls_unit_price")(0).innerText 'online 
  price results
  results(r, 4)(r, 5) = listing.getElementsByClassName("unitSelectButtonRES isRESBut")(0).innerText ' listing active
  results(r, 6) = listing.getElementsByClassName("tableUnitType _uSpan")(0).innerText ' features
Next

ws.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
ws.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
.Quit
End With
End Sub

Upvotes: 1

Related Questions