Reputation: 1300
I am using node to recursively traverse a file system and make a system call for each file, by using child.exec. It works well when tested on a small structure, with a couple of folders and files, but when run on the whole home directory, it crashes after a while
child_process.js:945
throw errnoException(process._errno, 'spawn');
^
Error: spawn Unknown system errno 23
at errnoException (child_process.js:998:11)
at ChildProcess.spawn (child_process.js:945:11)
at exports.spawn (child_process.js:733:9)
at Object.exports.execFile (child_process.js:617:15)
at exports.exec (child_process.js:588:18)
Does this happen because it uses up all resources? How can I avoid this?
EDIT: Code improvement and best practices suggestions always welcome :)
function processDir(dir, callback) {
fs.readdir(dir, function (err, files) {
if (err) {...}
if (files) {
async.each(files, function (file, cb) {
var filePath = dir + "/" + file;
var stats = fs.statSync(filePath);
if (stats) {
if (stats.isFile()) {
processFile(dir, file, function (err) {
if (err) {...}
cb();
});
} else if (stats.isDirectory()) {
processDir(filePath, function (err) {
if (err) {...}
cb();
});
}
}
}, function (err) {
if (err) {...}
callback();
}
);
}
});
}
Upvotes: 2
Views: 3766
Reputation: 707258
As has been established in comments, you are likely running out of file handles because you are running too many concurrent operations on your files. So, a solution is to limit the number of concurrent operations that run at once so too many files aren't in use at the same time.
Here's a somewhat different implementation that uses Bluebird promises to control both the async aspects of the operation and the concurrency aspects of the operation.
To make the management of the concurrency aspect easier, this collects the entire list of files into an array first and then processes the array of filenames rather than processing as you go. This makes it easier to use a built-in concurrency capability in Bluebird's .map()
(which works on a single array) so we don't have to write that code ourselves:
var Promise = require("bluebird");
var fs = Promise.promisifyAll(require("fs"));
var path = require("path");
// recurse a directory, call a callback on each file (that returns a promise)
// run a max of numConcurrent callbacks at once
// returns a promise for when all work is done
function processDir(dir, numConcurrent, fileCallback) {
var allFiles = [];
function listDir(dir, list) {
var dirs = [];
return fs.readdirAsync(dir).map(function(file) {
var filePath = path.join(dir , file);
return fs.statAsync(filePath).then(function(stats) {
if (stats.isFile()) {
allFiles.push(filePath);
} else if (stats.isDirectory()) {
return listDir(filePath);
}
}).catch(function() {
// ignore errors on .stat - file could just be gone now
return;
});
});
}
return listDir(dir, allFiles).then(function() {
return Promise.map(allFiles, function(filename) {
return fileCallback(filename);
}, {concurrency: numConcurrent});
});
}
// example usage:
// pass the initial directory,
// the number of concurrent operations allowed at once
// and a callback function (that returns a promise) to process each file
processDir(process.cwd(), 5, function(file) {
// put your own code here to process each file
// this is code to cause each callback to take a random amount of time
// for testing purposes
var rand = Math.floor(Math.random() * 500) + 500;
return Promise.delay(rand).then(function() {
console.log(file);
});
}).catch(function(e) {
// error here
}).finally(function() {
console.log("done");
});
FYI, I think you'll find that proper error propagation and proper error handling from many async operations is much, much easier with promises than the plain callback method.
Upvotes: 0
Reputation: 4005
Well, I don't know the reason for the failure, but if this is what you expect (using up all of the resources) or as others say (too many files open), you could try to use multitasking for it. JXcore (fork of Node.JS) offers such thing - it allows to run a task in a separate instance, but this is done still inside one single process.
While Node.JS app as a process has its limitations - JXcore with its sub-instances multiplies those limits: single process even with one extra instance (or task, or well: we can call it sub-thread) doubles the limits!
So, let's say, that you will run each of your spawn()
in a separate task. Or, since tasks are not running in a main thread any more - you can even use sync method that jxcore offers : cmdSync().
Probably the the best illustration would be given by this few lines of the code:
jxcore.tasks.setThreadCount(4);
var task = function(file) {
var your_cmd = "do something with " + file;
return jxcore.utils.cmdSync(your_cmd);
};
jxcore.tasks.addTask(task, "file1.txt", function(ret) {
console.log("the exit code:", ret.exitCode);
console.log("output:", ret.out);
});
Let me repeat: the task will not block the main thread, since it is running in a separate instance!
Multitasking API is documented here: Multitasking.
Upvotes: 0
Reputation: 576
the issue can be because of having many open files simultaneously
consider using async module to solve the issue https://github.com/caolan/async#eachLimit
async.eachLimit(
files,
20,
function(file, callback){
//process file here and call callback
},
function(err){
//done
}
);
in current example you will process 20 files at a time
Upvotes: 4