Reputation: 4168
Is it possible to essentially run a wget
from within a node.js app? I'd like to have a script that crawls a site, and downloads a specific file, but the href
of the link that goes the file changes fairly often. So, I figured the easiest way to go about doing it would be to find the href
of the link, then just perform a wget on it.
Thanks!
Upvotes: 16
Views: 21613
Reputation: 2820
You can use HTTPS client and FileSystem from Node.js.
Here an example with an async function. This function also handle redirect which wget does for you.
const http = require("https");
const fs = require("fs");
/**
* @param {string} url
* @param {string} dest
* @returns {Promise<void>}
*/
function wget(url, dest) {
return new Promise((res) => {
http.get(url, (response) => {
if (response.statusCode == 302) {
// if the response is a redirection, we call again the method with the new location
wget(String(response.headers.location), dest);
} else {
const file = fs.createWriteStream(dest);
response.pipe(file);
file.on("finish", function () {
file.close();
res();
});
}
});
});
}
Please note that you need to use http
or https
module according to your URL
Upvotes: 0
Reputation: 21
You can use node-wget. Works in cases where 'wget' is not possible
Upvotes: 2
Reputation: 3101
While it might be a little more verbose than some third-party stuff, Node's core HTTP
module provides for an HTTP client you could use for this:
var http = require('http');
var options = {
host: 'www.site2scrape.com',
port: 80,
path: '/page/scrape_me.html'
};
var req = http.get(options, function(response) {
// handle the response
var res_data = '';
response.on('data', function(chunk) {
res_data += chunk;
});
response.on('end', function() {
console.log(res_data);
});
});
req.on('error', function(err) {
console.log("Request error: " + err.message);
});
Upvotes: 16
Reputation: 39223
For future reference though, I would recommend request, which makes it this easy to fetch that file:
var request = require("request");
request(url, function(err, res, body) {
// Do funky stuff with body
});
Upvotes: 21
Reputation: 6958
U can just use wget.
var exec = require('child_process').exec;
child = exec("/path/to/wget http://some.domain/some.file", function (error, stdout, stderr) {
if (error !== null) {
console.log("ERROR: " + error);
}
else {
console.log("YEAH IT WORKED");
}
});
Upvotes: 1
Reputation: 24207
You can run an external command using child_processes:
var util = require('util'),
exec = require('child_process').exec,
child,
url = 'url to file';
child = exec('wget ' + url,
function (error, stdout, stderr) {
console.log('stdout: ' + stdout);
console.log('stderr: ' + stderr);
if (error !== null) {
console.log('exec error: ' + error);
}
});
Upvotes: 10