Giles Hunt
Giles Hunt

Reputation: 531

Node js Request - Empty body in response

I am using node js request to retrieve the HTML from the following URL but the body is returning empty.

var request = require("request");

var url = 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20';

request({
    uri: url
    }, function (error, response, body) {

        console.log(body);

        if (response.statusCode != '200') {
            console.log('fail');
            console.log(response.statusCode + ' # ' + error);
        } else {
            console.log(response.statusCode);
            console.log('############');
            console.log(response);
        }
    });

On closer inspection I can see this in the response:

_header: 'GET /webapp/wcs/stores/servlet/CatalogNavigationSearchResultCmd?langId=-1&storeId=12556&catalogId=33057&beginIndex=1&viewAllFlag=false&pageSize=20&searchTermScope=3&searchTermOperator=LIKE&searchType=ALL&sort_field=Relevance&searchTerm=TS19M11KRED&x=25&y=11&geoip=search HTTP/1.1\r\nreferer: http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod\r\nhost: www.topshop.com\r\nConnection: close\r\n\r\n',
_headers:
  { referer: 'http://www.topshop.com/en/tsuk/product/bags-accessories-1702216/scarves-465/feather-wings-5884878?bi=0&ps=20&geoip=prod',
    host: 'www.topshop.com' },

Which I assume means that there has been a redirect? Even though its returned a 200 OK instead of a 302 redirect.

I'm not sure of the best way to retrieve the body from the redirect? Do I need to make another request to the URL in the header? But shouldn't the response code be a 302 in this case instead of a 200?

Any help appreciated.

Upvotes: 7

Views: 6490

Answers (1)

rsp
rsp

Reputation: 111316

What you show seem like something that happened after a redirect - see that the referer is set to your original URL.

Maybe you should set more headers, like User-Agent because some servers don't respond without it.

For example, see the code that I wrote for this answer:

'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
    url: url,
    json: true,
    headers: {'User-Agent': 'request'}
  }, (err, res, data) => {
    if (err) {
      console.log('Error:', err);
    } else if (res.statusCode !== 200) {
      console.log('Status:', res.statusCode);
    } else {
      // data is already parsed as JSON:
      console.log(data.html_url);
    }
});

It returns:

Note that it doesn't work without the User-Agent header:

'use strict';
var request = require('request');
var url = 'https://api.github.com/users/rsp';
request.get({
    url: url,
    json: true,
  }, (err, res, data) => {
    if (err) {
      console.log('Error:', err);
    } else if (res.statusCode !== 200) {
      console.log('Status:', res.statusCode);
    } else {
      // data is already parsed as JSON:
      console.log(data.html_url);
    }
});

It returns:

  • Status: 403

The same URL, the same code - the only difference is the User-Agent header.

Upvotes: 5

Related Questions