Reputation: 31
I'm brand new to Node JS (v.10.9.0) and wanted to make a simple web scraping tool that gets statistics and ranks for players on this page. No matter what I can't make it work with this website, I tried multiple request methods including http.request and https.request and have gotten every method working with 'http://www.google.com'. However every attempt for this specific website either gives me a 301 error or a socket hang up error. The location the 301 error gives me is the same link but with a '/' on the end and requesting it results in a socket hang up. I know the site runs on port 443. Do some sites just block node js, why are browsers able to connect but not stuff like this?
Please don't link me to any other threads I've seen every single one and none of them have helped
var request = require('request');
var options = {
method: "GET",
uri: 'https://www.smashboards.com',
rejectUnauthorized: false,
port: '443'
};
request(options, function (error, response, body) {
console.log('error:', error); // Print the error if one occurred
console.log('statusCode:'); // Print the response status code if a response was received
console.log('body:', body); // Print the HTML for the Google homepage.
});
Error:
error: { Error: socket hang up
at createHangUpError (_http_client.js:322:15)
at TLSSocket.socketOnEnd (_http_client.js:425:23)
at TLSSocket.emit (events.js:187:15)
at endReadableNT (_stream_readable.js:1085:12)
at process._tickCallback (internal/process/next_tick.js:63:19) code: 'ECONNRESET' }
EDIT:
Adding this to my options object fixed my problem
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
Upvotes: 0
Views: 1654
Reputation: 31
OP Here
All I did was add:
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
To my options Object and it's working perfectly.
New code:
var request = require('request');
var options = {
method: 'GET',
uri: 'https://www.smashboards.com',
rejectUnauthorized: false,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
};
request(options, function (error, response, body) {
console.log('error:', error); // Print the error if one occurred
console.log('statusCode:'); // Print the response status code if a response was received
console.log('body:', body); // Print the HTML for the Google homepage.
});
Thats 12+ hours I'm never getting back
Upvotes: 0