YasserKhalil
YasserKhalil

Reputation: 9538

Set authentication XMLHTTP export HTML

I am trying the following code so as to be able to export specific HTML page but the website requires credentials. Here's my try

Sub NewTest()
Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument

Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument

Dim sUser As String, sPass As String
sUser = Application.WorksheetFunction.EncodeURL("myemail")
sPass = Application.WorksheetFunction.EncodeURL("mypass")

Dim postData As String
postData = "user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass

With http
    .Open "POST", sURL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    .send postData
    html.body.innerHTML = .responseText
    ExportHTML .responseText
End With

Stop
End Sub

Sub ExportHTML(sInput As String)
With CreateObject("ADODB.Stream")
    .Charset = "UTF-8"
    .Open
    .WriteText sInput
    .SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
    .Close
End With
End Sub

I don't know what's wrong with my code .. I expect to have specific HTML page stored on my desktop but when I open the HTML page I didn't find the page but find the credentials request for email and password.

I have changed the url and this fixed the problem I think Const sURL As String = "https://courses.myexcelonline.com/users/sign_in". But I need to know how to get specific HTML page from the site?

Upvotes: 0

Views: 376

Answers (1)

mxswift
mxswift

Reputation: 106

It looks like you're sending a "GET" instead of "POST".

Since you are sending POST data with your request, you need to change the "GET" to "POST'

.Open "Get", sURL, False

To

.Open "POST", sURL, False

EDIT : The URL you provided does not correlate to a POST, but rather a GET.

You need to send a GET request to the login page first, followed by a POST to the same login URL page it is - https://courses.myexcelonline.com/users/sign_in, followed by a GET to your original URL provided since by that moment you're already logged in and can navigate to your URL correctly.

Here is an example POST data - The authenticity token can be found when you send you send the first GET to the login page in the HTML. To do this, get the .responseText after the GET request. You can then use the Split function to get the auth token like this.

<meta name="csrf-param" content="authenticity_token" />
<meta name="csrf-token" content="ReGa49vTQ0KqSZtje3VerA8q3O9J1RLO7lEU/+q1hccys6S1uXn6UigKiKTjDp2yEFQRpYpziRQLnXifJLxb+Q==" />

Dim authToken As String
authToken = Application.WorksheetFunction.EncodeURL(Split(Split(http.responseText, "csrf-token"" content=""")(1), """")(0))

Dim postData As String
postdata = "utf8=%E2%9C%93&authenticity_token=" & authToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"

Here is the final code

Sub NewTest()
Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
Const sLoginURL As String = "https://courses.myexcelonline.com/users/sign_in"
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument

Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument

Dim sUser As String, sPass As String, sAuthToken As String, postData As String
sUser = Application.WorksheetFunction.EncodeURL("myemail")
sPass = Application.WorksheetFunction.EncodeURL("mypass")

'Load Login Page
With http
    .Open "GET", sLoginURL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    .send
    .WaitForResponse
    html.body.innerHTML = .responseText
End With

'Get Auth Token from HTML
sAuthToken = Application.WorksheetFunction.EncodeURL((Split( (Split(http.body.innerHTML, "csrf-token"" content=""")(1)), """")(0)))

'Construct the POST data that will be sent
postData = "utf8=%E2%9C%93&authenticity_token=" & sAuthToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"

'Attempt to login 
With http
    .Open "POST", sLoginURL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    .send (postData)
    .WaitForResponse
End With

'Add code here to verify if login was successful...

'If successful... navigate to sURL 
With http
    .Open "GET", sURL, False
    .setRequestHeader "User-Agent", "Mozilla/5.0"
    .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
    .send 
    .WaitForResponse
    html.body.innerHTML = .responseText
    ExportHTML .responseText
End With

Stop
End Sub

Sub ExportHTML(sInput As String)
With CreateObject("ADODB.Stream")
    .Charset = "UTF-8"
    .Open
    .WriteText sInput
    .SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
    .Close
End With
End Sub
enter code here

Upvotes: 2

Related Questions