Reputation: 7323
How do I download a file with Node.js without using third-party libraries?
I don't need anything special. I only want to download a file from a given URL, and then save it to a given directory.
Upvotes: 678
Views: 914118
Reputation: 53665
Late 2022 edit:
Node v18 and on come with native Fetch API support built right into Node itself. No need for third party libraries, imports, requires, or small hand-crafted shims, just use global fetch
the way you're used to from the browser.
(I.e. the second code block, below, does not need the const fetch = require(`./that-code-shown-above.js`);
line anymore, fetch
already exists globally)
Original answer:
For Node with Promise
support, a simple Node shim for (part of) the Fetch API requires only a smattering of extra code, rather than needing to install any special modules:
const http = require(`http`);
const https = require(`https`);
module.exports = function fetch(url) {
// we're returning a promise, so this function can also be `await`ed
return new Promise((resolve, reject) => {
const data = [];
// make sure we use the correct protocol handler
const client = url.startsWith("https") ? https : http;
client
.request(url, (conn) => {
// aggregate the response stream into a single string.
conn.on(`data`, (chunk) => data.push(chunk));
conn.on(`end`, () => {
// make sure to encode that string using utf8
const asBytes = Buffer.concat(data);
const asString = asBytes.toString(`utf8`);
// and then trigger the resolution, with the
// most frequently used fetch API "follow-up"
// functions:
resolve({
arrayBuffer: async () => asBytes,
json: async () => JSON.parse(asString),
text: async () => asString,
});
});
conn.on(`error`, (e) => reject(e));
})
.end();
});
};
Which you can then use for whatever you need, using the normal fetch syntax you're used to from the browser:
const fs = require(`fs`);
// As per the note above: remove the following line for Node 18 and above
const fetch = require(`./that-code-shown-above.js`);
fetch(`https://placecats.com/00/300`)
.then(res => res.arrayBuffer())
.then(bytes => fs.writeFileSync(`kitten.jpg`, new Uint8Array(bytes)))
.catch(e => console.error(e));
try {
const response = await fetch(`https://jsonplaceholder.typicode.com/todos/1`);
const data = await response.json();
console.log(data);
} catch (e) {
console.error(e);
}
// etc.
Upvotes: 10
Reputation: 296
Slightly modifying deadcoder0904's response for real beginners (i.e. me) that aren't even using [options]
in Node's new fetch implementation:
const fs = require('node:fs');
let url = 'https://example.com/';
let fn = 'example.html';
let promise = fetch(url);
promise.then(response => response.blob())
.then(blob => blob.arrayBuffer())
.then(Buffer.from)
.then((buf) => {
fs.writeFileSync(fn, buf);
console.log('file written');
return;
});
The above code works for images, text, and entire webpages, just by changing fn
and url
- from what I have tested. More explanation of each step would be appreciated.
Upvotes: 0
Reputation: 8693
Just use native fetch
like:
const response = await fetch(url, {
method: 'POST',
body,
})
const buffer = await response.arrayBuffer()
fs.writeFileSync(`file${i}.txt`, Buffer.from(buffer))
My route sent a Response
like:
return new Response(data, {
headers: { 'content-type': 'application/text' },
})
so I used Buffer.from
which you might or might not need depending on whether you want to download an image or text file.
Upvotes: 6
Reputation: 159125
As of Node 18, you can use the built-in fetch
global, which implements the Fetch API to download data with several methods built in to directly work with the result as plain text, JS-converted-from-JSON, or binary data (as ArrayBuffer).
For older versions of Node, you can create an HTTP GET
request and pipe its response
into a writable file stream:
const http = require('http'); // or 'https' for https:// URLs
const fs = require('fs');
const file = fs.createWriteStream("file.jpg");
const request = http.get("http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg", function(response) {
response.pipe(file);
// after download completed close filestream
file.on("finish", () => {
file.close();
console.log("Download Completed");
});
});
If you want to support gathering information on the command line--like specifying a target file or directory, or URL--check out something like Commander.
More detailed explanation in https://sebhastian.com/nodejs-download-file/
Upvotes: 913
Reputation: 169
Here is the easiest way to do this:
async function archive_fromURL(url: string, destinationPath: string) {
const response = await axios.get(url, { responseType: 'arraybuffer' });
const fileData = Buffer.from(response.data, 'binary')
return fs.writeFile(destinationPath, fileData, (err) => {
if (err) { console.log("ErrorMessage:", err.message) }
})
}
Upvotes: 0
Reputation: 5173
Speaking of handling errors, it's even better listening to request errors too. I'd even validate by checking response code. Here it's considered success only for 200 response code, but other codes might be good.
const fs = require('fs');
const http = require('http');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const request = http.get(url, (response) => {
// check if response is success
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
response.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request error too
request.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
Despite the relative simplicity of this code, I would advise to use the request module as it handles many more protocols (hello HTTPS!) which aren't natively supported by http
.
That would be done like so:
const fs = require('fs');
const request = require('request');
const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const sendReq = request.get(url);
// verify response code
sendReq.on('response', (response) => {
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}
sendReq.pipe(file);
});
// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));
// check for request errors
sendReq.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};
EDIT:
To make it work with https
, change
const http = require('http');
to
const http = require('https');
Upvotes: 93
Reputation: 6247
Based on the other answers above and some subtle issues, here is my attempt.
fs.access
.fs.createWriteStream
if you get a 200 OK
status code. This reduces the amount of fs.unlink
commands required to tidy up temporary file handles.200 OK
we can still possibly reject
due to an EEXIST
file already exists (imagine another process created the file whilst we were doing network calls).download
if you get a 301 Moved Permanently
or 302 Found (Moved Temporarily)
redirect following the link location provided in the header.download
was that they called resolve(download)
instead of download(...).then(() => resolve())
so the Promise
would return before the download actually finished. This way the nested chain of promises resolve in the correct order.const https = require('https');
const fs = require('fs');
/**
* Download a resource from `url` to `dest`.
* @param {string} url - Valid URL to attempt download of resource
* @param {string} dest - Valid path to save the file.
* @returns {Promise<void>} - Returns asynchronously when successfully completed download
*/
function download(url, dest) {
return new Promise((resolve, reject) => {
// Check file does not exist yet before hitting network
fs.access(dest, fs.constants.F_OK, (err) => {
if (err === null) reject('File already exists');
const request = https.get(url, response => {
if (response.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' });
file.on('finish', () => resolve());
file.on('error', err => {
file.close();
if (err.code === 'EEXIST') reject('File already exists');
else fs.unlink(dest, () => reject(err.message)); // Delete temp file
});
response.pipe(file);
} else if (response.statusCode === 302 || response.statusCode === 301) {
//Recursively follow redirects, only a 200 will resolve.
download(response.headers.location, dest).then(() => resolve());
} else {
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
request.on('error', err => {
reject(err.message);
});
});
});
}
Upvotes: 22
Reputation: 24954
Modern version (ES6, Promise, Node 12.x+ ) works for https/http. ALso it supports redirects 302 & 301. I decided do not use 3rd party libraries due to it can be easy done with standard Node.js libs.
// download.js
import fs from 'fs'
import https from 'https'
import http from 'http'
import { basename } from 'path'
import { URL } from 'url'
const TIMEOUT = 10000
function download (url, dest) {
const uri = new URL(url)
if (!dest) {
dest = basename(uri.pathname)
}
const pkg = url.toLowerCase().startsWith('https:') ? https : http
return new Promise((resolve, reject) => {
const request = pkg.get(uri.href).on('response', (res) => {
if (res.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' })
res
.on('end', () => {
file.end()
// console.log(`${uri.pathname} downloaded to: ${path}`)
resolve()
})
.on('error', (err) => {
file.destroy()
fs.unlink(dest, () => reject(err))
}).pipe(file)
} else if (res.statusCode === 302 || res.statusCode === 301) {
// Recursively follow redirects, only a 200 will resolve.
download(res.headers.location, dest).then(() => resolve())
} else {
reject(new Error(`Download request failed, response status: ${res.statusCode} ${res.statusMessage}`))
}
})
request.setTimeout(TIMEOUT, function () {
request.abort()
reject(new Error(`Request timeout after ${TIMEOUT / 1000.0}s`))
})
})
}
export default download
Kudo to Andrey Tkachenko for his gist which I modified
Include it in another file and use
const download = require('./download.js')
const url = 'https://raw.githubusercontent.com/replace-this-with-your-remote-file'
console.log('Downloading ' + url)
async function run() {
console.log('Downloading file')
try {
await download(url, 'server')
console.log('Download done')
} catch (e) {
console.log('Download failed')
console.log(e.message)
}
}
run()
Upvotes: 10
Reputation: 1123
I've found this approach to be the most helpful especially when it comes to pdfs and random other files.
import fs from "fs";
fs.appendFile("output_file_name.ext", fileDataInBytes, (err) => {
if (err) throw err;
console.log("File saved!");
});
Upvotes: -3
Reputation: 736
I saw answers using the http, https, and request modules. I'd like to add one using yet another native NodeJS module that supports either the http or https protocol:
I've referenced the official NodeJS API, as well as some of the other answers on this question for something I'm doing. The following was the test I wrote to try it out, which worked as intended:
import * as fs from 'fs';
import * as _path from 'path';
import * as http2 from 'http2';
/* ... */
async function download( host, query, destination )
{
return new Promise
(
( resolve, reject ) =>
{
// Connect to client:
const client = http2.connect( host );
client.on( 'error', error => reject( error ) );
// Prepare a write stream:
const fullPath = _path.join( fs.realPathSync( '.' ), destination );
const file = fs.createWriteStream( fullPath, { flags: "wx" } );
file.on( 'error', error => reject( error ) );
// Create a request:
const request = client.request( { [':path']: query } );
// On initial response handle non-success (!== 200) status error:
request.on
(
'response',
( headers/*, flags*/ ) =>
{
if( headers[':status'] !== 200 )
{
file.close();
fs.unlink( fullPath, () => {} );
reject( new Error( `Server responded with ${headers[':status']}` ) );
}
}
);
// Set encoding for the payload:
request.setEncoding( 'utf8' );
// Write the payload to file:
request.on( 'data', chunk => file.write( chunk ) );
// Handle ending the request
request.on
(
'end',
() =>
{
file.close();
client.close();
resolve( { result: true } );
}
);
/*
You can use request.setTimeout( 12000, () => {} ) for aborting
after period of inactivity
*/
// Fire off [flush] the request:
request.end();
}
);
}
Then, for example:
/* ... */
let downloaded = await download( 'https://gitlab.com', '/api/v4/...', 'tmp/tmpFile' );
if( downloaded.result )
{
// Success!
}
// ...
External References
EDIT Information
function
declaration, which our contributor has so promptly added. Thanks!Upvotes: 7
Reputation: 1
I suggest you to use res.download
same as follow:
app.get('/download', function(req, res){
const file = `${__dirname}/folder/abc.csv`;
res.download(file); // Set disposition and send it.
});
Upvotes: -4
Reputation: 32797
Writing my own solution since the existing didn't fit my requirements.
What this covers:
http
for HTTP downloads)It's typed, it's safer. Feel free to drop the types if you're working with plain JS (no Flow, no TS) or convert to a .d.ts
file
index.js
import httpsDownload from httpsDownload;
httpsDownload('https://example.com/file.zip', './');
httpsDownload.[js|ts]
import https from "https";
import fs from "fs";
import path from "path";
function download(
url: string,
folder?: string,
filename?: string
): Promise<void> {
return new Promise((resolve, reject) => {
const req = https
.request(url, { headers: { "User-Agent": "javascript" } }, (response) => {
if (response.statusCode === 302 && response.headers.location != null) {
download(
buildNextUrl(url, response.headers.location),
folder,
filename
)
.then(resolve)
.catch(reject);
return;
}
const file = fs.createWriteStream(
buildDestinationPath(url, folder, filename)
);
response.pipe(file);
file.on("finish", () => {
file.close();
resolve();
});
})
.on("error", reject);
req.end();
});
}
function buildNextUrl(current: string, next: string) {
const isNextUrlAbsolute = RegExp("^(?:[a-z]+:)?//").test(next);
if (isNextUrlAbsolute) {
return next;
} else {
const currentURL = new URL(current);
const fullHost = `${currentURL.protocol}//${currentURL.hostname}${
currentURL.port ? ":" + currentURL.port : ""
}`;
return `${fullHost}${next}`;
}
}
function buildDestinationPath(url: string, folder?: string, filename?: string) {
return path.join(folder ?? "./", filename ?? generateFilenameFromPath(url));
}
function generateFilenameFromPath(url: string): string {
const urlParts = url.split("/");
return urlParts[urlParts.length - 1] ?? "";
}
export default download;
Upvotes: 1
Reputation: 8746
As Michelle Tilley said, but with the appropriate control flow:
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb);
});
});
}
Without waiting for the finish
event, naive scripts may end up with an incomplete file.
Edit: Thanks to @Augusto Roman for pointing out that cb
should be passed to file.close
, not called explicitly.
Upvotes: 180
Reputation: 1606
download.js (i.e. /project/utils/download.js)
const fs = require('fs');
const request = require('request');
const download = (uri, filename, callback) => {
request.head(uri, (err, res, body) => {
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
module.exports = { download };
app.js
...
// part of imports
const { download } = require('./utils/download');
...
// add this function wherever
download('https://imageurl.com', 'imagename.jpg', () => {
console.log('done')
});
Upvotes: 6
Reputation: 11625
✅So if you use pipeline, it would close all other streams and make sure that there are no memory leaks.
Working example:
const http = require('http'); const { pipeline } = require('stream'); const fs = require('fs'); const file = fs.createWriteStream('./file.jpg'); http.get('http://via.placeholder.com/150/92c952', response => { pipeline( response, file, err => { if (err) console.error('Pipeline failed.', err); else console.log('Pipeline succeeded.'); } ); });
From my answer to "What's the difference between .pipe and .pipeline on streams".
Upvotes: 7
Reputation: 1633
Here's yet another way to handle it without 3rd party dependency and also searching for redirects:
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
https.get(url, function(response) {
if ([301,302].indexOf(response.statusCode) !== -1) {
body = [];
download(response.headers.location, dest, cb);
}
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
Upvotes: 0
Reputation: 2844
I prefer request() because you can use both http and https with it.
request('http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg')
.pipe(fs.createWriteStream('cat.jpg'))
Upvotes: 8
Reputation: 659
gfxmonk's answer has a very tight data race between the callback and the file.close()
completing. file.close()
actually takes a callback that is called when the close has completed. Otherwise, immediate uses of the file may fail (very rarely!).
A complete solution is:
var http = require('http');
var fs = require('fs');
var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb); // close() is async, call cb after close completes.
});
});
}
Without waiting for the finish event, naive scripts may end up with an incomplete file. Without scheduling the cb
callback via close, you may get a race between accessing the file and the file actually being ready.
Upvotes: 53
Reputation: 331
Hi,I think you can use child_process module and curl command.
const cp = require('child_process');
let download = async function(uri, filename){
let command = `curl -o ${filename} '${uri}'`;
let result = cp.execSync(command);
};
async function test() {
await download('http://zhangwenning.top/20181221001417.png', './20181221001417.png')
}
test()
In addition,when you want download large、multiple files,you can use cluster module to use more cpu cores.
Upvotes: 20
Reputation: 237
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.cryptocompare.com/media/19684/doge.png', 'icons/taskks12.png', function(){
console.log('done');
});
Upvotes: 0
Reputation: 20118
If you are using express use res.download() method. otherwise fs module use.
app.get('/read-android', function(req, res) {
var file = "/home/sony/Documents/docs/Android.apk";
res.download(file)
});
(or)
function readApp(req,res) {
var file = req.fileName,
filePath = "/home/sony/Documents/docs/";
fs.exists(filePath, function(exists){
if (exists) {
res.writeHead(200, {
"Content-Type": "application/octet-stream",
"Content-Disposition" : "attachment; filename=" + file});
fs.createReadStream(filePath + file).pipe(res);
} else {
res.writeHead(400, {"Content-Type": "text/plain"});
res.end("ERROR File does NOT Exists.ipa");
}
});
}
Upvotes: 4
Reputation: 9
You can try using res.redirect
to the https file download url, and then it will be downloading the file.
Like: res.redirect('https//static.file.com/file.txt');
Upvotes: -1
Reputation: 13223
The following code is based on Brandon Tilley's answer :
var http = require('http'),
fs = require('fs');
var request = http.get("http://example12345.com/yourfile.html", function(response) {
if (response.statusCode === 200) {
var file = fs.createWriteStream("copy.html");
response.pipe(file);
}
// Add timeout.
request.setTimeout(12000, function () {
request.abort();
});
});
Don't make file when you get an error, and prefere to use timeout to close your request after X secondes.
Upvotes: 17
Reputation: 703
function download(url, dest, cb) {
var request = http.get(url, function (response) {
const settings = {
flags: 'w',
encoding: 'utf8',
fd: null,
mode: 0o666,
autoClose: true
};
// response.pipe(fs.createWriteStream(dest, settings));
var file = fs.createWriteStream(dest, settings);
response.pipe(file);
file.on('finish', function () {
let okMsg = {
text: `File downloaded successfully`
}
cb(okMsg);
file.end();
});
}).on('error', function (err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
let errorMsg = {
text: `Error in file downloadin: ${err.message}`
}
if (cb) cb(errorMsg);
});
};
Upvotes: 0
Reputation: 9154
Without library it could be buggy just to point out. Here are a few:
Protocol "https:" not supported.
Here my suggestion:
wget
or curl
var wget = require('node-wget-promise');
wget('http://nodejs.org/images/logo.svg');
Upvotes: 1
Reputation: 714
Maybe node.js has changed, but it seems there are some problems with the other solutions (using node v8.1.2):
file.close()
in the finish
event. Per default the fs.createWriteStream
is set to autoClose: https://nodejs.org/api/fs.html#fs_fs_createwritestream_path_optionsfile.close()
should be called on error. Maybe this is not needed when the file is deleted (unlink()
), but normally it is: https://nodejs.org/api/stream.html#stream_readable_pipe_destination_optionsstatusCode !== 200
fs.unlink()
without a callback is deprecated (outputs warning)dest
file exists; it is overriddenBelow is a modified solution (using ES6 and promises) which handles these problems.
const http = require("http");
const fs = require("fs");
function download(url, dest) {
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(dest, { flags: "wx" });
const request = http.get(url, response => {
if (response.statusCode === 200) {
response.pipe(file);
} else {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
request.on("error", err => {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
});
file.on("finish", () => {
resolve();
});
file.on("error", err => {
file.close();
if (err.code === "EEXIST") {
reject("File already exists");
} else {
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
}
});
});
}
Upvotes: 30
Reputation: 978
var requestModule=require("request");
requestModule(filePath).pipe(fs.createWriteStream('abc.zip'));
Upvotes: -5
Reputation: 7434
const download = (url, path) => new Promise((resolve, reject) => {
http.get(url, response => {
const statusCode = response.statusCode;
if (statusCode !== 200) {
return reject('Download error!');
}
const writeStream = fs.createWriteStream(path);
response.pipe(writeStream);
writeStream.on('error', () => reject('Error writing to file!'));
writeStream.on('finish', () => writeStream.close(resolve));
});}).catch(err => console.error(err));
Upvotes: 7
Reputation: 10949
Download using promise, which resolve a readable stream. put extra logic to handle the redirect.
var http = require('http');
var promise = require('bluebird');
var url = require('url');
var fs = require('fs');
var assert = require('assert');
function download(option) {
assert(option);
if (typeof option == 'string') {
option = url.parse(option);
}
return new promise(function(resolve, reject) {
var req = http.request(option, function(res) {
if (res.statusCode == 200) {
resolve(res);
} else {
if (res.statusCode === 301 && res.headers.location) {
resolve(download(res.headers.location));
} else {
reject(res.statusCode);
}
}
})
.on('error', function(e) {
reject(e);
})
.end();
});
}
download('http://localhost:8080/redirect')
.then(function(stream) {
try {
var writeStream = fs.createWriteStream('holyhigh.jpg');
stream.pipe(writeStream);
} catch(e) {
console.error(e);
}
});
Upvotes: 5
Reputation: 25054
for those who came in search of es6-style promise based way, I guess it would be something like:
var http = require('http');
var fs = require('fs');
function pDownload(url, dest){
var file = fs.createWriteStream(dest);
return new Promise((resolve, reject) => {
var responseSent = false; // flag to make sure that response is sent only once.
http.get(url, response => {
response.pipe(file);
file.on('finish', () =>{
file.close(() => {
if(responseSent) return;
responseSent = true;
resolve();
});
});
}).on('error', err => {
if(responseSent) return;
responseSent = true;
reject(err);
});
});
}
//example
pDownload(url, fileLocation)
.then( ()=> console.log('downloaded file no issues...'))
.catch( e => console.error('error while downloading', e));
Upvotes: 21