Reputation: 5729
I would like to read a source code of an thml page with boost::asio.
Here is my sample code :
#include "stdafx.h"
#include <boost/algorithm/string/replace.hpp>
#include <boost/asio.hpp>
#include <vector>
#include <string>
#include <algorithm>
#include <sstream>
#include <ostream>
int _tmain(int argc, _TCHAR* argv[])
{
std::string strHost = "www.fdj.fr";
std::string strPort = "80";
std::string strUrlPath = "https://www.fdj.fr/jeux/jeux-de-tirage/keno/resultats";
std::string strUserAgent = "Fiddler";
//std::vector<std::string> header;
unsigned int TimeOut = 5000;
//do_get(strHost, strPort, strUrlPath, header, TimeOut);
try
{
using boost::asio::ip::tcp;
boost::asio::io_service io_service;
// Get a list of endpoints corresponding to the server name.
tcp::resolver resolver(io_service);
tcp::resolver::query query(strHost, "http");
tcp::resolver::iterator endpoint_iterator = resolver.resolve(query);
tcp::resolver::iterator end;
// Try each endpoint until we successfully establish a connection.
tcp::socket socket(io_service);
boost::system::error_code error = boost::asio::error::host_not_found;
while (error && endpoint_iterator != end)
{
socket.close();
socket.connect(*endpoint_iterator++, error);
}
if (error)
throw boost::system::system_error(error);
// Form the request. We specify the "Connection: close" header so that the
// server will close the socket after transmitting the response. This will
// allow us to treat all data up until the EOF as the content.
boost::asio::streambuf request;
std::ostream request_stream(&request);
request_stream << "GET " << strUrlPath << " HTTP/1.0\r\n";
request_stream << "User-Agent: " << strUserAgent << "\r\n";
request_stream << "Host: " << strHost << "\r\n";
request_stream << "Accept: */*\r\n";
request_stream << "Connection: close\r\n\r\n";
// Send the request.
boost::asio::write(socket, request);
// Read the response status line.
boost::asio::streambuf response;
boost::asio::read_until(socket, response, "\r\n");
// Check that response is OK.
std::istream response_stream(&response);
std::string http_version;
response_stream >> http_version;
unsigned int status_code;
response_stream >> status_code;
std::string status_message;
std::getline(response_stream, status_message);
if (!response_stream || http_version.substr(0, 5) != "HTTP/")
{
std::cout << "Invalid response\n";
//return 1;
}
/*if (status_code != 200)
{
std::cout << "Response returned with status code " << status_code << "\n";
//return 1;
}*/
// Read the response headers, which are terminated by a blank line.
boost::asio::read_until(socket, response, "\r\n\r\n");
// Process the response headers.
std::string header;
while (std::getline(response_stream, header) && header != "\r")
std::cout << header << "\n";
std::cout << "\n";
// Write whatever content we already have to output.
if (response.size() > 0)
std::cout << &response;
// Read until EOF, writing data to output as we go.
while (boost::asio::read(socket, response,
boost::asio::transfer_at_least(1), error))
std::cout << &response;
if (error != boost::asio::error::eof)
throw boost::system::system_error(error);
char a;
std::cin>>a ;
}
catch (std::exception& e)
{
std::cout << "Exception: " << e.what() << "\n";
}
return 0;
}
The request sended is :
GET https://www.fdj.fr/jeux/jeux-de-tirage/keno/resultats HTTP/1.0
User-Agent: Fiddler
Host: www.fdj.fr
Accept: */*
Connection: close
If i run my programm i have :
If i run this request in fiddler ( by copy/paste) i have :
return code=200
(Accepted) and the page is loaded !
Anyone have an idea why i have this 301 error ? I think it's due to an boost::asio parameters, but which ?
Thanks a lot,
Best regards,
Nixeus
Upvotes: 1
Views: 1134
Reputation: 2750
There are two things happening here:
You are attempting to load an HTTPS URI over plain HTTP. This results in a 301 redirect that points you back to the original page over HTTPS. This indicates that the hosting server does not provide this resource over insecure HTTP.
Your HTTP implementation does not support redirects. You are making a request and delivering the first response. A full HTTP client would check for a 301 response and retry using the URI in the location header of the 301.
To fix this you will need to implement support for TLS/HTTPS and ideally HTTP redirects.
Upvotes: 4