Meesha
Meesha

Reputation: 821

Extracting file URL from a Hyperlinked Image

Sub DownloadFile() 

    Dim myURL As String
    myURL = "http://data.bls.gov/timeseries/LNS14000000"
    Dim WinHttpReq As Object
    Set WinHttpReq = CreateObject("Microsoft.XMLHTTP")
    WinHttpReq.Open "GET", myURL, False, "username", "password"
    WinHttpReq.send

    myURL = WinHttpReq.responseBody
    If WinHttpReq.Status = 200 Then
        Set oStream = CreateObject("ADODB.Stream")
        oStream.Open
        oStream.Type = 1
        oStream.Write WinHttpReq.responseBody
        oStream.SaveToFile "C:\Downloads\abc.xlsx", 2
        oStream.Close
    End If

End Sub

I am trying to download data using VBA and found this code running pretty well. The webpage URL from which I am trying to download data is the one I have used in the code. Please take a moment and open the webpage as the Excel file I am trying to download is linked in an image and so I am not able to find the URL to download the file from that image. Please advice. Thanks.enter image description here

Upvotes: 2

Views: 225

Answers (2)

QHarr
QHarr

Reputation: 84465

Once response is stored in an HTMLDocument object you can use a CSS selector of

#download_xlsx

The "#" means id.

You can then click on this element

htmlDocument.querySelector("#download_xlsx").Click

VBA:

Option Explicit
Public Sub DownloadFile()
    Dim ie As New InternetExplorer
    With ie
        .Visible = True
        .navigate "https://data.bls.gov/timeseries/LNS14000000"
        While .Busy Or .readyState < 4: DoEvents: Wend
        .document.querySelector("#download_xlsx").Click
        .Quit
    End With
End Sub

Other:

You could even target the form and submit:

.document.forms("excel").submit

This triggers the POST request mentioned in the other answer (which is an awesome answer btw).

Upvotes: 0

user4039065
user4039065

Reputation:

You might be able to hit the form target directly with a POST (action="/pdq/SurveyOutputServlet") but it is expecting a post string of the <input> elements together with their values. Most if not all of these input elements have been filled out for you simply by going to that page. All you need to do is collect and concatenate them into a post string to be shoved back at the form.

Option Explicit

'base web page
Public Const csBLSGOVpg = "http://data.bls.gov/timeseries/LNS14000000"
'form's action target
Public Const csXLSDLpg = "http://data.bls.gov/pdq/SurveyOutputServlet"

Sub mcr_Stream_Buyer_Documents()
    Dim xmlDL As New MSXML2.ServerXMLHTTP60, xmlBDY As New HTMLDocument, adoFILE As Object
    Dim xmlSend As String, strFN As String, f As Long, i As Long

    With xmlDL
        .SetTimeouts 5000, 5000, 15000, 25000

        'start by going to the base web page
        .Open "GET", csBLSGOVpg, False
        .setRequestHeader "Content-Type", "text/javascript"
        .send

        If .Status <> "200" Then GoTo bm_Exit

        'get the source HTML for examination; zero the post string var
        xmlBDY.body.innerHTML = .responseText
        xmlSend = vbNullString

        'loop through the forms until you find the right one
        'then loop through the input elements and construct a post string
        For f = 0 To xmlBDY.getElementsByTagName("form").Length - 1
            If xmlBDY.getElementsByTagName("form")(f).Name = "excel" Then
                With xmlBDY.getElementsByTagName("form")(f)
                    For i = 0 To .getElementsByTagName("input").Length - 1
                        xmlSend = xmlSend & Chr(38) & _
                                 .getElementsByTagName("input")(i).Name & Chr(61) & _
                                 .getElementsByTagName("input")(i).Value
                    Next i
                    xmlSend = "?.x=5&.y=5" & xmlSend
                End With
                Exit For
            End If
        Next f
        'Debug.Print xmlSend   'check the POST string

        'send the POST string back to the form's action target
        .Open "POST", csXLSDLpg, False
        xmlDL.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        xmlDL.send xmlSend

        If xmlDL.Status <> "200" Then GoTo bm_Exit

        'pick up the response as a stream and save it as a .XLSX
        strFN = Environ("USERPROFILE") & "\Documents\LNS14000000" & Format(Date, "yyyymmdd") & ".xlsx"
        On Error Resume Next
        Kill strFN
        On Error GoTo 0
        Set adoFILE = CreateObject("ADODB.Stream")
        adoFILE.Type = 1
        adoFILE.Open
        adoFILE.Write .responseBody
        adoFILE.SaveToFile strFN, 2
        Set adoFILE = Nothing

    End With
    Set xmlBDY = Nothing
    Set xmlDL = Nothing
    Exit Sub
bm_Exit:
    Debug.Print Err.Number & ":" & Err.Description
End Sub

This is pretty minimalist but it is all that you need. There is at least one non-standard input element that does not have a name but I elected to send its value back anyway. I did not sequentially remove things until it broke; I just built the POST string given what I retrieved and sent it back.

     XML stream download                LNS1400000020150916.xlsx

You will probably be moving this code to some sort of loop. Adjust the receiving file name accordingly. Each new page should adjust its own form input elements accordingly.

Upvotes: 2

Related Questions