Reputation: 531
I am using node js request to retrieve the HTML from the following URL but the body is returning empty.
var request = require("request");
var url = 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20';
request({
uri: url
}, function (error, response, body) {
console.log(body);
if (response.statusCode != '200') {
console.log('fail');
console.log(response.statusCode + ' # ' + error);
} else {
console.log(response.statusCode);
console.log('############');
console.log(response);
}
});
On closer inspection I can see this in the response:
_header: 'GET /webapp/wcs/stores/servlet/CatalogNavigationSearchResultCmd?langId=-1&storeId=12556&catalogId=33057&beginIndex=1&viewAllFlag=false&pageSize=20&searchTermScope=3&searchTermOperator=LIKE&searchType=ALL&sort_field=Relevance&searchTerm=TS19M11KRED&x=25&y=11&geoip=search HTTP/1.1\r\nreferer: http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod\r\nhost: www.topshop.com\r\nConnection: close\r\n\r\n',
_headers:
{ referer: 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod',
host: 'www.topshop.com' },
Which I assume means that there has been a redirect? Even though its returned a 200 OK instead of a 302 redirect.
I'm not sure of the best way to retrieve the body from the redirect? Do I need to make another request to the URL in the header? But shouldn't the response code be a 302 in this case instead of a 200?
Any help appreciated.
Upvotes: 7
Views: 6490
Reputation: 111316
What you show seem like something that happened after a redirect - see that the referer
is set to your original URL.
Maybe you should set more headers, like User-Agent
because some servers don't respond without it.
For example, see the code that I wrote for this answer:
'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
url: url,
json: true,
headers: {'User-Agent': 'request'}
}, (err, res, data) => {
if (err) {
console.log('Error:', err);
} else if (res.statusCode !== 200) {
console.log('Status:', res.statusCode);
} else {
// data is already parsed as JSON:
console.log(data.html_url);
}
});
It returns:
Note that it doesn't work without the User-Agent
header:
'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
url: url,
json: true,
}, (err, res, data) => {
if (err) {
console.log('Error:', err);
} else if (res.statusCode !== 200) {
console.log('Status:', res.statusCode);
} else {
// data is already parsed as JSON:
console.log(data.html_url);
}
});
It returns:
The same URL, the same code - the only difference is the User-Agent
header.
Upvotes: 5