alf10087
alf10087

Reputation: 25

Scraping data from website using Excel VBA

I am going to the following website:

https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1=1642+Harmon+Street&address2=&city=Berkeley&state=CA&urbanCode=&postalCode=&zip=94703

And I am trying to extract the first zip+4 that shows up (94703-2636).

Dim doc As HTMLDocument
Set doc = IE.document
On Error Resume Next
output = doc.getElementsByClassName("zip4")(0).innerText
'Sheet1.Range("E2").Value = output
MsgBox output

'IE.Quit
End Sub

This is how I am trying to do it, but either the textbox or adding the data to the range gives a blank answer. That's not the full code, but everything before seems to be working alright.

Any thoughts on how may I solve this? Thank you very much!

EDIT: This is my full code:

The cells it is referencing are the ones with the full address.

Sub USPS()

Dim IE As Object
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = True

IE.Navigate "https://tools.usps.com/go/ZipLookupAction!input.action?mode=1&refresh=true"
Do
DoEvents
Loop Until IE.READYSTATE = 4

Dim Address As String
Address = Sheet1.Range("A2").Value

Dim City As String
City = Sheet1.Range("B2").Value

Dim State As String
State = Sheet1.Range("C2").Value

Dim Zipcode As String
Zipcode = Sheet1.Range("D2").Value


Call IE.document.getElementbyID("tAddress").SetAttribute("value", Address)
Call IE.document.getElementbyID("tCity").SetAttribute("value", City)
With IE.document.getElementbyID("sState")
    For i = 0 To .Length - 1
        If .Item(i).Value = State Then
            .Item(i).Selected = True
            Exit For
        End If
    Next

End With

Call IE.document.getElementbyID("Zzip").SetAttribute("value", Zipcode)

Set ElementCol = IE.document.getElementbyID("lookupZipFindBtn")
ElementCol.Click


''''' Hard Part

Dim doc As HTMLDocument
Set doc = IE.document
On Error Resume Next
output = Trim(doc.getElementsByClassName("zip4")(0).innerText)
'Sheet1.Range("E2").Value = output
MsgBox output

'IE.Quit
End Sub

EDIT 2: XML with Dynamic URL?

Sub ZipLookUp()
Dim URL As String, xmlHTTP As Object, html As Object, htmlResponse As String
Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
Dim Zip4Digit As String

Dim number As String
Dim address As String
Dim city As String
Dim state As String
Dim zipcode As String
Dim abc As String

number = Sheet1.Range("A2")
address = Sheet1.Range("B2")
city = Sheet1.Range("C2")
state = Sheet1.Range("D2")
zipcode = Sheet1.Range("E2")

    URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1="
    URL = URL & number & "+" & address & "&address2=&city=" & city & "&state=" & state & "&urbanCode=&postalCode=&zip=" & zipcode
    Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
    xmlHTTP.Open "GET", URL, False
    On Error GoTo NoConnect
    xmlHTTP.send
    On Error GoTo 0
    Set html = CreateObject("htmlfile")
    htmlResponse = xmlHTTP.responseText
    If htmlResponse = Null Then
        MsgBox ("Aborted - HTML response was null")
        GoTo End_Prog
    End If

    SStr = "<span class=""zip4"">": EStr = "</span><br />" 'Searches for a string within 2 strings
    StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
    EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
    Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)

    Sheet1.Range("F2").Value = Zip4Digit

GoTo End_Prog
NoConnect:
    If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub

Upvotes: 0

Views: 1272

Answers (2)

Edward
Edward

Reputation: 1

Just a thought, did you think about using regular expressions rather than simple string searching? If not, there are some useful modules in VBA. An example is if you want to determine if a filename is an Excel file (stored in TestStr), you could do the following:

Dim oRe As VBScript_RegExp_10.regexp, TestStrIsExcel as Boolean

Dim oMatches As VBScript_RegExp_10.MatchCollection

Dim oMatch As VBScript_RegExp_10.Match

oRe.Pattern = "\.(xlm|xlsm|xls|xlsx)$"

oRe.IgnoreCase = True

' Find all occurrences

oRe.Global = False

Set oMatches = oRe.Execute(TestStr)

If oMatches.Count <> 0 Then TestStrIsExcel = true

Upvotes: 0

TCN
TCN

Reputation: 1651

This works for me, plus it's just faster. Opening an actual instance of IE is much slower than using XMLHTTP.

Public Sub ZipLookUp()
    Dim URL As String, xmlHTTP As Object, html As Object, document As Object, htmlResponse As String
    Dim SStr As String, EStr As String, EndS As Integer, StartS As Integer
    Dim Zip4Digit As String
    Dim number As String
    Dim address As String
    Dim city As String
    Dim state As String
    Dim zipcode As String
    Dim ws As Worksheet

    ' it is good practice to define sheets (and cells) instead of simply referencing them multiple times
    ' that way, you can change them much more easily it if you *ever* need to.
    Set ws = Sheets("Sheet1") ' instead of 'Sheet1', the correct syntax is Sheets("Sheet1").Range("A1")

    number = ws.Range("A2")
    address = ws.Range("B2")
    city = ws.Range("C2")
    state = ws.Range("D2")
    zipcode = ws.Range("E2")


    URL = "https://tools.usps.com/go/ZipLookupResultsAction!input.action?resultMode=1&companyName=&address1="
    URL = URL & number & "+" & address & "&address2=&city=" & city & "&state=" & state & "&urbanCode=&postalCode=&zip=" & zipcode
    Set xmlHTTP = CreateObject("MSXML2.XMLHTTP")
    xmlHTTP.Open "GET", URL, False
    On Error GoTo NoConnect
    xmlHTTP.send
    Do Until xmlHTTP.ReadyState = 4 And xmlHTTP.Status = 200: DoEvents: Loop
    On Error GoTo 0
    Set html = CreateObject("htmlfile")
    htmlResponse = xmlHTTP.ResponseText
    If htmlResponse = Null Then
        MsgBox ("Aborted - HTML response was null")
        GoTo End_Prog
    End If

    SStr = "<span class=""zip4"">": EStr = "</span><br />" 'Searches for a string within 2 strings
    StartS = InStr(1, htmlResponse, SStr, vbTextCompare) + Len(SStr)
    EndS = InStr(StartS, htmlResponse, EStr, vbTextCompare)
    Zip4Digit = Left(Mid(htmlResponse, StartS, EndS - StartS), 4)

    ws.Range("F2").Value = Zip4Digit

GoTo End_Prog
NoConnect:
    If Err = -2147467259 Or Err = -2146697211 Then MsgBox "Error - No Connection": GoTo End_Prog 'MsgBox Err & ": " & Error(Err)
End_Prog:
End Sub

Upvotes: 1

Related Questions