Roland Smak Gregoor
Roland Smak Gregoor

Reputation: 1

Scrape html tables from multiple url's

I am trying to scrape the table contents from a serie of url's. I have been working on the code below, which executes the following steps:

  1. Add new worksheet based on value in Range A1:A3 in worksheet "Start" (1, 2, 3, etc)
  2. Create url based on the same value (, 2, 3, etc)
  3. Activate new worksheet
  4. Open URL and scrape table

The following happens:

  1. New worksheets are added (1, 2, 3)
  2. Worksheet("1") contains the table from
  3. Worksheets("2") and following remain empty

URL added - http://cao.szw.nl/index.cfm?fuseaction=app.caoOverzicht&menu_item_id=16534&hoofdmenu_item_id=16507&rubriek_item=392846&rubriek_id=392840&strSorteerWijze=asc&strGesorteerdeKolom=cao_naam&pagenumber=1

Note - the website is fully in Dutch ;)

Where is the mistake?

Sub TableExample()

    Dim IE As Object, doc As Object
    Dim strURL As String
    Dim ws As Worksheet, wsActive As Worksheet
    Dim i As Long, tabno As Long, nextrow As Long
    Dim cell As Range
    Dim MyNames As Range, MyNewSheet As Range
    Dim tbl As Object, rw As Object, cl As Object

    Set ws = Sheets("Start")

    With ws

       Dim rng As Range
       Set rng = .Range("A1:A3")

       For Each cell In rng
            Sheets.Add.Name = cell.Value
            Set wsActive = ThisWorkbook.ActiveSheet
            strURL = "http://xxx&pagenumber=" & cell.Value
            Set IE = CreateObject("InternetExplorer.Application")
            With IE
                '.Visible = True
                .navigate strURL
                Do Until .readyState = 4: DoEvents: Loop
                    Do While .Busy: DoEvents: Loop
                        Set doc = IE.document
                            With wsActive
                                For Each tbl In doc.getElementsByTagName("TABLE")
                                    tabno = tabno + 1
                                    nextrow = nextrow + 1
                                    Set rng = wsActive.Range("B" & nextrow)
                                    rng.Offset(, -1) = "Table " & tabno
                                    For Each rw In tbl.Rows
                                        For Each cl In rw.Cells
                                            rng.Value = cl.outerText
                                            Set rng = rng.Offset(, 1)
                                            i = i + 1
                                        Next cl
                                        nextrow = nextrow + 1
                                        Set rng = rng.Offset(1, -i)
                                        i = 0
                                    Next rw
                                Next tbl
                            End With
            End With
       Next
    End With

    IE.Quit

End Sub

Upvotes: 0

Views: 446

Answers (1)

gembird
gembird

Reputation: 14053

Checked you code and simplyfied it and it works for me. Btw. there is nothing wrong with for-each of table-rows-cells, they are valid objects.

Option Explicit

Sub TableExample()

    Dim IE As Object, doc As Object
    Dim strURL As String
    Dim ws As Worksheet, wsActive As Worksheet
    Dim i As Long, tabno As Long, nextrow As Long
    Dim cell As Range
    Dim MyNames As Range, MyNewSheet As Range
    Dim tbl As Object, rw As Object, cl As Object
    Dim rng As Range

    Set IE = CreateObject("InternetExplorer.Application")

    IE.Visible = True
    Set ws = Sheets("Start")
    Set rng = ws.Range("A1:A3")

    For Each cell In rng
        Sheets.Add.Name = cell.Value
        Set wsActive = ThisWorkbook.ActiveSheet
        strURL = "http://xxx&pagenumber=" & cell.Value
        IE.navigate strURL

        Do Until IE.readyState = 4: DoEvents: Loop

        Set doc = IE.document

        For Each tbl In doc.getElementsByTagName("TABLE")
            tabno = tabno + 1
            nextrow = nextrow + 1
            Set rng = wsActive.Range("B" & nextrow)
            rng.Offset(, -1) = "Table " & tabno
            For Each rw In tbl.Rows
                For Each cl In rw.Cells
                    rng.Value = cl.outerText
                    Set rng = rng.Offset(, 1)
                    i = i + 1
                Next cl
                nextrow = nextrow + 1
                Set rng = rng.Offset(1, -i)
                i = 0
            Next rw
        Next tbl
    Next cell

    IE.Quit

End Sub

enter image description here

Upvotes: 1

Related Questions