A.Cod
A.Cod

Reputation: 71

VBA: Using Control F and Grabbing Relevant Data from IE Webpage

Goal: Make a VBA Macro that opens up a webpage via IE, loops through entire page, uses the Ctrl+F function to find MULTIPLE keywords, if those keywords are found, locates the row those keywords are in, and grabs a certain number of rows above and below that keyword row location and posts them to an excel sheet to be emailed out.

I have code that goes to the webpage, and uses Ctrl+F to find the keyword. This part works correctly. I don't know how to loop through the whole webpage and do this for multiple keywords. I also am having trouble finding the row location of each keyword 'hit' and posting it to excel (not that skilled with VBA).

Sub Find()

    'create a variable to refer to an IE application, and
    'start up a new copy of IE
    Dim ieApp As New SHDocVw.InternetExplorer
    Dim objectIE As Object

    'make sure you can see
    ieApp.Visible = True

    'go to the website of interest
    ieApp.Navigate "URL HERE"

    'wait for page to finish loading
    Do While ieApp.Busy
    Loop

    'Declare Keywords
    Dim keyword1 As String
    Dim found As Boolean

    keyword1 = "keyword"
    For i = 1 To ie.document.all.Length
        Application.Wait (Now + TimeValue("0:00:02"))
        SendKeys "^f"
        Application.Wait (Now + TimeValue("0:00:01"))
        SendKeys (keyword1)
        Application.Wait (Now + TimeValue("0:00:01"))
        SendKeys ("{ENTER}")
    Next i

End Sub

Upvotes: 1

Views: 3292

Answers (1)

omegastripes
omegastripes

Reputation: 12602

Here is the example, which implements keyword look up in webpage document text nodes, if found - expands the range to the entire table cell, then outputs all matches on to worksheet:

Sub Find()

    sKeyword = "language"
    sUrl = "http://stackoverflow.com/tags"

    Set oList = CreateObject("Scripting.Dictionary")
    With CreateObject("InternetExplorer.Application")

        .Visible = True
        ' Navigating to url
        .Navigate sUrl
        ' Wait for IE ready
        Do While .ReadyState <> 4 Or .Busy
            DoEvents
        Loop
        ' Wait for document complete
        Do While .Document.ReadyState <> "complete"
            DoEvents
        Loop

        '    ' Look up in the specified node - optional
        '    ' Wait for target node created
        '    Do While TypeName(.Document.getElementById("Content")) = "Null" ' replace Content with your Id
        '        DoEvents
        '    Loop
        '    ' Get target node
        '    Set oRoot = .Document.getElementById("Content")

        ' Look up in the entire document
        Set oRoot = .Document.getElementsByTagName("html")(0)
        Set oWalker = .Document.createTreeWalker(oRoot, 4, Null, False) ' NodeFilter.SHOW_TEXT = 4
        Set oNode = oWalker.currentNode
        Do
            Select Case True
                Case IsNull(oNode.NodeValue)
                Case oNode.NodeValue = ""
                Case InStr(oNode.NodeValue, sKeyword) = 0
                Case Else
                    ' Text node contains keyword
                    Debug.Print oNode.NodeValue
                    Do
                        ' Expand the range until thenode of the necessary type is enclosed
                        Set oNode = oNode.ParentNode
                        Debug.Print TypeName(oNode)
                        Select Case TypeName(oNode)
                            '   ' Non-table structures
                            '   Case "HTMLHtmlElement", "HTMLBody", "HTMLDivElement", "HTMLParagraphElement", "HTMLHeadingElement"
                            ' For tables
                            Case "HTMLHtmlElement", "HTMLBody", "HTMLTableRow", "HTMLTableCell"
                                Exit Do
                        End Select
                    Loop
                    ' Add to list
                    sText = oNode.innerText
                    Debug.Print sText
                    oList(oList.Count) = sText
            End Select
            ' Get next node
            oWalker.NextNode
            Set oPrev = oNode
            Set oNode = oWalker.currentNode
        Loop Until oNode Is oPrev
        .Quit

    End With

    ' Results output
    aList = oList.Items()
    Cells(1, 1).Resize(UBound(aList) + 1, 1).Value = aList

End Sub

As an example, for source page as follows

src

the output is

output

For multiple keywords search please elaborate the rule: do all keywords, or at least one of the keywords should be found in one sentence?

Upvotes: 2

Related Questions