Nico Rodriguez
Nico Rodriguez

Reputation: 55

VBA - Scrape HTML table without id

I am trying to get data from a html table with VBA. After selecting a value from a list box, filling a text box and clicking a button the table appears. But the url of the website does not change.

My program does fill the box, select the list box value and click the "search" button, but then I can't get the data from the table.

I need the values of the table's cells at the end of the page. (second < t d >)

Here's the url of the page:

Code:

Sub Info()

Dim enlace As String
Dim id As String
Dim lista
Dim rut As Integer
Dim i As Integer
Dim largo As Integer

largo = Worksheets("Lista").Cells(rows.Count, 1).End(xlUp).Row

id = Worksheets("Lista").Cells(2, 1).Value
lista = Split(id, "-")
rut = lista(0)
enlace = "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=" & rut & "&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw+cAAhAABP4MAAz&control=svs&pestania=1"

Set objIE = CreateObject("InternetExplorer.application")
objIE.Visible = False
objIE.Navigate (enlace)
Do
    If objIE.ReadyState = 4 Then
        objIE.Visible = False
        Exit Do
    Else
        DoEvents
        End If
Loop

Dim button_name As String
button_name = "Aportantes"

Set link = objIE.document.getElementsByTagName("A")
For Each Hyperlink In link
If InStr(Hyperlink.innerText, button_name) > 0 Then
    Hyperlink.Click
Exit For
End If
Next

Dim nuevoLink As String
nuevoLink = Hyperlink

objIE.Quit

Set ie = CreateObject("InternetExplorer.application")
ie.Visible = False
ie.Navigate (nuevoLink)
Do
    If ie.ReadyState = 4 Then
        ie.Visible = False
        Exit Do
    Else
        DoEvents
        End If
Loop

Dim sem As String
Dim ano As Integer
sem = "03"
ano = 2018

Dim aportantes As Object
Dim cuotas_emitidas As Object

ie.document.getElementById("semestre").Value = sem
ie.document.getElementById("aa").Value = ano
Set elems = ie.document.getElementsByTagName("input")
For Each e In elems
If (e.getAttribute("value") = "Consultar") Then
    e.Click
    ''HERE IS THE PROBLEM
    Set aportantes = ie.document.getElementsByTagName("table")(1).getElementsByTagName("tr")(0).getElementsByTagName("tr")(1)
    ThisWorkbook.Worksheets("Lista").Cells(i, 4).Value = aportantes
    Set cuotas_emitidas = ie.document.getElementsByTagName("table")(1).getElementsByTagName("tr")(1).getElementsByTagName("tr")(1).innerText
    ThisWorkbook.Worksheets("Lista").Cells(i, 5).Value = cuotas_emitidas
End If
Next e
End Sub

HTML:

<table>
 <tbody>
    <tr>
    <td class="fondoOscuro">2.01.60 TOTAL APORTANTES</td>
    <td>58</td>
  </tr>

  <tr>
    <td class="fondoOscuro">2.01.70 CUOTAS EMITIDAS</td>
    <td>20000000 </td>
  </tr>
  <tr>
    <td class="fondoOscuro">2.01.71 CUOTAS PAGADAS</td>
    <td>7691000</td>

  </tr>
  <tr>
    <td class="fondoOscuro">2.01.72 CUOTAS SUSCRITAS Y NO PAGADAS</td>
    <td>0 </td>
  </tr>
  <tr>
    <td class="fondoOscuro">2.01.73 NUMERO DE CUOTAS CON PROMESA DE SUSCRIPCION Y PAGO</td>
    <td>0  </td>
  </tr>
  <tr>
    <td class="fondoOscuro">2.01.74 NUMERO DE CONTRATOS DE PROMESAS DE SUSCRIPCION Y PAGO</td>
    <td>0</td>
  </tr>
  <tr>
    <td class="fondoOscuro">2.01.75 NUMERO DE PROMITENTES SUSCRIPTORES DE CUOTAS</td>
    <td>0 </td>
  </tr>
  <tr>
    <td class="fondoOscuro">2.01.80 VALOR LIBRO DE LA CUOTA</td>
    <td>1.0059 </td>
  </tr>
</tbody></table>

'

Upvotes: 1

Views: 1896

Answers (2)

QHarr
QHarr

Reputation: 84465

XHR:

You can do the whole thing with XHR and scrape without opening a browser. Change the Activesheet output to the sheet you want to write the table to (WriteTable hTable, 1, ActiveSheet).

Note the arguments for the POST body include:

  1. mm=12 # months
  2. aa=2017 year
  3. rut=9278 rut code

Code:

Public Sub GetTable()
    Dim sResponse As String, hTable As Object, id As String, lista() As String, rut As String
    Dim strBody As String
    id = Worksheets("Lista").Cells(2, 1).Value
    lista = Split(id, "-")
    rut = lista(0)

    strBody = "mm=12&aa=2017&rut=" & rut
    With CreateObject("MSXML2.XMLHTTP")
        .Open "POST", "http://www.cmfchile.cl//institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27", False
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .send strBody
        sResponse = StrConv(.responseBody, vbUnicode)
    End With

    sResponse = Mid$(sResponse, InStr(1, sResponse, "<!DOCTYPE "))

    With CreateObject("htmlFile")
        .Write sResponse
        Set hTable = .getElementsByTagName("table")(1)
    End With
    Application.ScreenUpdating = False
    WriteTable hTable, 1, ActiveSheet
    Application.ScreenUpdating = True
End Sub

Public Sub WriteTable(ByVal hTable As Object, Optional ByVal startRow As Long = 1, Optional ByVal ws As Worksheet)
    If ws Is Nothing Then Set ws = ActiveSheet

    Dim tSection As Object, tRow As Object, tCell As Object, tr As Object, td As Object, R As Long, C As Long, tBody As Object
    R = startRow
    With ws
        Set tBody = hTable.getElementsByTagName("tbody")
        For Each tSection In tBody               'HTMLTableSection
            Set tRow = tSection.getElementsByTagName("tr") 'HTMLTableRow
            For Each tr In tRow
                R = R + 1
                Set tCell = tr.getElementsByTagName("td")
                C = 1
                For Each td In tCell             'DispHTMLElementCollection
                    .Cells(R, C).Value = td.innerText 'HTMLTableCell
                    C = C + 1
                Next td
            Next tr
        Next tSection
    End With
End Sub

With browser (using WriteTable sub from above as well)

Option Explicit
Public Sub GetInfo()
    Dim ie As New InternetExplorer, hTable As HTMLTable
    Application.ScreenUpdating = False
    With ie
        .Visible = True
        .navigate "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
        While .Busy Or .readyState < 4: DoEvents: Wend
        .document.getElementById("aa").Value = 2017
        .document.forms("consulta").submit
        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementsByTagName("table")(1)
            On Error GoTo 0
        Loop While hTable Is Nothing

        WriteTable hTable, 1, ActiveSheet
    End With
    Application.ScreenUpdating = True
End Sub

Output:

output


References:

HTML Object library via VBE > Tools > References


Adjusting to your code outline but still using pestania = 27

Option Explicit
Public Sub GetInfo()
    Dim ie As New InternetExplorer, hTable As HTMLTable, lista() As String, id As String, rut As String, enlace As String
    Application.ScreenUpdating = False

    id = Worksheets("Lista").Cells(2, 1).Value
    lista = Split(id, "-")
    rut = lista(0)
    enlace = "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=" & rut & "&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw+cAAhAABP4MAAz&control=svs&pestania=27"

    With ie
        .Visible = True
        .navigate enlace  '"http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
        While .Busy Or .readyState < 4: DoEvents: Wend
        .document.getElementById("aa").Value = 2017
        .document.forms("consulta").submit
        Do
            DoEvents
            On Error Resume Next
            Set hTable = .document.getElementsByTagName("table")(1)
            On Error GoTo 0
        Loop While hTable Is Nothing

        WriteTable hTable, 1, ActiveSheet
    End With
    Application.ScreenUpdating = True
End Sub

Upvotes: 1

SIM
SIM

Reputation: 22440

You have already got nice answers. The thing is when QHarr decides to come into play, he barely leaves any alternative for others to take a stand. However, the following script will spare you some extra times. I've used IE to get the page source and then applied faster method to manage the rest. I tried to parse the relevant tabular data populated against year 2016. Feel free to change the year as per your requirement.

Sub ScrapeTabularInfo()
    Dim IE As New InternetExplorer, Html As HTMLDocument
    Dim Htmldoc As New HTMLDocument, post As Object, elem As Object
    Dim trow As Object, R&, C&

    With IE
        .Visible = False
        .navigate "http://www.cmfchile.cl/institucional/mercados/entidad.php?auth=&send=&mercado=V&rut=9278&grupo=&tipoentidad=FINRE&vig=VI&row=AAAw%20cAAhAABP4MAAz&control=svs&pestania=27"
        While .Busy Or .readyState < 4: DoEvents: Wend
        Set Html = .document
        Html.querySelector("#aa").innerText = 2016
        Html.querySelector("input[value='Consultar']").Click
        Do: Set post = Html.getElementsByTagName("table")(1): DoEvents: Loop While post Is Nothing
    End With

    Htmldoc.body.innerHTML = Html.DocumentElement.outerHTML

    For Each elem In Htmldoc.getElementsByTagName("table")(1).Rows
        For Each trow In elem.Cells
            C = C + 1: Cells(R + 1, C) = trow.innerText
        Next trow
        C = 0: R = R + 1
    Next elem
    IE.Quit
End Sub

The best approach here is to avail post request which you already have a demo.

Reference to add to the library (considering the fact that you have IE9 or later version for the .querySelector() to work properly):

Microsoft Internet Controls
Microsoft HTML Object Libray

Upvotes: 1

Related Questions