Reputation: 9538
I am trying the following code so as to be able to export specific HTML page but the website requires credentials. Here's my try
Sub NewTest()
Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument
Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
Dim sUser As String, sPass As String
sUser = Application.WorksheetFunction.EncodeURL("myemail")
sPass = Application.WorksheetFunction.EncodeURL("mypass")
Dim postData As String
postData = "user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass
With http
.Open "POST", sURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send postData
html.body.innerHTML = .responseText
ExportHTML .responseText
End With
Stop
End Sub
Sub ExportHTML(sInput As String)
With CreateObject("ADODB.Stream")
.Charset = "UTF-8"
.Open
.WriteText sInput
.SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
.Close
End With
End Sub
I don't know what's wrong with my code .. I expect to have specific HTML page stored on my desktop but when I open the HTML page I didn't find the page but find the credentials request for email and password.
I have changed the url and this fixed the problem I think Const sURL As String = "https://courses.myexcelonline.com/users/sign_in"
. But I need to know how to get specific HTML page from the site?
Upvotes: 0
Views: 376
Reputation: 106
It looks like you're sending a "GET" instead of "POST".
Since you are sending POST data with your request, you need to change the "GET" to "POST'
.Open "Get", sURL, False
To
.Open "POST", sURL, False
EDIT : The URL you provided does not correlate to a POST, but rather a GET.
You need to send a GET request to the login page first, followed by a POST to the same login URL page it is - https://courses.myexcelonline.com/users/sign_in, followed by a GET to your original URL provided since by that moment you're already logged in and can navigate to your URL correctly.
Here is an example POST data - The authenticity token can be found when you send you send the first GET to the login page in the HTML. To do this, get the .responseText after the GET request. You can then use the Split function to get the auth token like this.
<meta name="csrf-param" content="authenticity_token" />
<meta name="csrf-token" content="ReGa49vTQ0KqSZtje3VerA8q3O9J1RLO7lEU/+q1hccys6S1uXn6UigKiKTjDp2yEFQRpYpziRQLnXifJLxb+Q==" />
Dim authToken As String
authToken = Application.WorksheetFunction.EncodeURL(Split(Split(http.responseText, "csrf-token"" content=""")(1), """")(0))
Dim postData As String
postdata = "utf8=%E2%9C%93&authenticity_token=" & authToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"
Here is the final code
Sub NewTest()
Const sURL As String = "https://courses.myexcelonline.com/courses/take/microsoft-teams/lessons/11482643-microsoft-teams-course-overview"
Const sLoginURL As String = "https://courses.myexcelonline.com/users/sign_in"
Dim http As MSXML2.XMLHTTP60, html As MSHTML.HTMLDocument
Set http = New MSXML2.XMLHTTP60
Set html = New MSHTML.HTMLDocument
Dim sUser As String, sPass As String, sAuthToken As String, postData As String
sUser = Application.WorksheetFunction.EncodeURL("myemail")
sPass = Application.WorksheetFunction.EncodeURL("mypass")
'Load Login Page
With http
.Open "GET", sLoginURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
.WaitForResponse
html.body.innerHTML = .responseText
End With
'Get Auth Token from HTML
sAuthToken = Application.WorksheetFunction.EncodeURL((Split( (Split(http.body.innerHTML, "csrf-token"" content=""")(1)), """")(0)))
'Construct the POST data that will be sent
postData = "utf8=%E2%9C%93&authenticity_token=" & sAuthToken & "&user%5Bemail%5D=" & sUser & "&user%5Bpassword%5D=" & sPass & "&user%5Bremember_me%5D=0"
'Attempt to login
With http
.Open "POST", sLoginURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send (postData)
.WaitForResponse
End With
'Add code here to verify if login was successful...
'If successful... navigate to sURL
With http
.Open "GET", sURL, False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.send
.WaitForResponse
html.body.innerHTML = .responseText
ExportHTML .responseText
End With
Stop
End Sub
Sub ExportHTML(sInput As String)
With CreateObject("ADODB.Stream")
.Charset = "UTF-8"
.Open
.WriteText sInput
.SaveToFile Environ("USERPROFILE") & "\Desktop\OutputHTML.html", 2
.Close
End With
End Sub
enter code here
Upvotes: 2