Reputation: 1386
We long-term Python and PHP coders have a tidy bit of synchronous code (sample below). Most of the functions have asynchronous counterparts. We really want to 'get' the power of Javascript and Node, and believe this is an ideal case of where asynchronous node.js would speed things up and blow our socks off.
What is the textbook way to refactor the following to utilize asynchronous Node? Async
/ await
and promise.all
? How? (Using Node 8.4.0. Backwards compatibility is not a concern.)
var fs = require('fs');
// This could list over 10,000 files of various size
const fileList = ['file1', 'file2', 'file3'];
const getCdate = file => fs.statSync(file).ctime; // Has async method
const getFSize = file => fs.statSync(file).size; // Has async method
// Can be async through file streams (see resources below)
const getMd5 = (file) => {
let fileData = new Buffer(0);
fileData = fs.readFileSync(file);
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
};
let filesObj = fileList.map(file => [file, {
datetime: getCdate(file),
filesize: getFSize(file),
md5hash: getMd5(file),
}]);
console.log(filesObj);
Notes:
filesObj
than listed herefileList
.Assorted file stream methods for getting md5 asynchronously:
Upvotes: 0
Views: 282
Reputation: 5973
I would make getCDate
, getFSize
, and getMd5
all asynchronous and promisified then wrap them in another asynchronous promise-returning function, here called statFile
.
function statFile(file) {
return Promise.all([
getCDate(file),
getFSize(file),
getMd5(file)
]).then((datetime, filesize, md5hash) => ({datetime, filesize, md5hash}))
.catch(/*handle error*/);
}
Then you could change your mapping function to
const promises = fileList.map(statFile);
Then it's simple to use Promise.all:
Promise.all(promises)
.then(filesObj => /*do something*/)
.catch(err => /*handle error*/)
This leaves things modular, doesn't require async/await, allows you to plug extra functions into statFile
, and preserves your file order.
Upvotes: 0
Reputation: 59213
There are a variety of different ways you could handle this code asynchronously. You could use the node async library to handle all of the callbacks more elegantly. If you don't want to dive into promises then that's the "easy" option. I put easy in quotes because promises are actually easier if you understand them well enough. The async lib is helpful but it still leaves much to be desired in the way of error propagation, and there is a lot of boilerplate code you'll have to wrap all your calls in.
The better way is to use promises. Async/Await is still pretty new. Not even supported in node 7 (not sure about node 8) without a preprocessor like Bable or Typescript. Also, async/await uses promises under the hood anyway.
Here is how I would do it using promises, even included a file stats cache for maximum performance:
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
// Use Bluebird's Promise.promisifyAll utility to turn all of fs'
// async functions into promise returning versions of them.
// The new promise-enabled methods will have the same name but with
// a suffix of "Async". Ex: fs.stat will be fs.statAsync.
Promise.promisifyAll(fs);
// Create a cache to store the file if we're planning to get multiple
// stats from it.
let cache = {
fileName: null,
fileStats: null
};
const getFileStats = (fileName, prop) => {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
// Return a promise that eventually resolves to the data we're after
// but also stores fileStats in our cache for future calls.
return fs.statAsync(fileName).then(fileStats => {
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
})
};
const getMd5Hash = file => {
// Return a promise that eventually resolves to the hash we're after.
return fs.readFileAsync(file).then(fileData => {
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
});
};
// Create a promise that immediately resolves with our fileList array.
// Use Bluebird's Promise.map utility. Works very similar to Array.map
// except it expects all array entries to be promises that will
// eventually be resolved to the data we want.
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
// This first gets a promise that starts resolving file stats
// asynchronously. When the promise resolves it will store file
// stats in a cache and then return the stats value we're after.
// Note that the final return is not a promise, but returning raw
// values from promise handlers implicitly does
// Promise.resolve(rawValue)
getFileStats(fileName, 'ctime'),
// This one will not return a promise. It will see cached file
// stats for our file and return the stats value from the cache
// instead. Since it's being returned into a Promise.all, it will
// be implicitly wrapped in Promise.resolve(rawValue) to fit the
// promise paradigm.
getFileStats(fileName, 'size'),
// First returns a promise that begins resolving the file data for
// our file. A promise handler in the function will then perform
// the operations we need to do on the file data in order to get
// the hash. The raw hash value is returned in the end and
// implicitly wrapped in Promise.resolve as well.
getMd5(file)
])
// .spread is a bluebird shortcut that replaces .then. If the value
// being resolved is an array (which it is because Promise.all will
// resolve an array containing the results in the same order as we
// listed the calls in the input array) then .spread will spread the
// values in that array out and pass them in as individual function
// parameters.
.spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(error => {
// Any errors returned by any of the Async functions in this promise
// chain will be propagated here.
console.log(error);
});
Here's the code again but without comments to make it easier to look at:
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
Promise.promisifyAll(fs);
let cache = {
fileName: null,
fileStats: null
};
const getFileStats = (fileName, prop) => {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
return fs.statAsync(fileName).then(fileStats => {
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
})
};
const getMd5Hash = file => {
return fs.readFileAsync(file).then(fileData => {
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
});
};
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
getFileStats(fileName, 'ctime'),
getFileStats(fileName, 'size'),
getMd5(file)
]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);
In the end results will be an array like which should hopefully match the results of your original code but should perform much better in a benchmark:
[
['file1', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
['file2', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }],
['file3', { dateTime: 'data here', fileSize: 'data here', md5Hash: 'data here' }]
]
Apologies in advance for any typos. Didn't have the time or ability to actually run any of this. I looked over it quite extensively though.
After discovering that async/await is in node as of 7.6 I decided to play with it a bit last night. It seems most useful for recursive async tasks that don't need to be done in parallel, or for nested async tasks that you might wish you could write synchronously. For what you needed here there isn't any mind-blowing way to use async/await that I can see but there are a few places where the code would read more cleanly. Here's the code again but with a few little async/await conveniences.
const fs = require('fs');
const crypto = require('crypto');
const Promise = require('bluebird');
const fileList = ['file1', 'file2', 'file3'];
Promise.promisifyAll(fs);
let cache = {
fileName: null,
fileStats: null
};
async function getFileStats (fileName, prop) {
if (cache.fileName === fileName) {
return cache.fileStats[prop];
}
let fileStats = await fs.stat(fileName);
cache.fileName = fileName;
cache.fileStats = fileStats;
return fileStats[prop];
};
async function getMd5Hash (file) {
let fileData = await fs.readFileAsync(file);
const hash = crypto.createHash('md5');
hash.update(fileData);
return hash.digest('hex');
};
let results = Promise.resolve(fileList).map(fileName => {
return Promise.all([
getFileStats(fileName, 'ctime'),
getFileStats(fileName, 'size'),
getMd5(file)
]).spread((dateTime, fileSize, md5Hash) => [file, { dateTime, fileSize, md5Hash }]);
}).catch(console.log);
Upvotes: 1