Leahcim
Leahcim

Reputation: 42069

how to write to new files sequentially

I have a directory of files (files dir below) each with data that is being processed (and mixed with data from the initial file data/phrase.js) before the transformed data is written (actually appended) to new files in the output dir. My problem is that all of the data for each file is being written to file at the very end (after all the processing is complete). I would rather have the first file processed then written to disk, then the second file processed and written to disk etc, so that less data is held in memory. (Although very few files are involved in this example, in my actual application, there are many more files)

Question: Why is the data being written to file at the very end (once all of the files have been processed)? is there a way to write the data to file as soon as it is ready, rather than holding it all in memory until all the data for each file is ready?

    var fs = require('fs');
    //file with some data
    fs.readFile('./data/phrase.js', function(err, data){
        var somephrase = data.toString();
         //directory of many files
        fs.readdir('./files/', (err, files) => {    
             files.forEach(file => {
                let f = './files/' + file;
                fs.readFile(f, (err, data2) => {    
                    let somenumber = data2.toString();
                    //intermingle the data from initial file (phrase.js) with each of the files in files dir
                    let output = somenumber + somephrase;
                    //write output to new files
                    let output_file = './output/' + somenumber + 'js';
                    fs.appendFile(output_file, output,function(err){
                        if (err){
                                console.log("err")
                            }
                    });
                });
             });
        });
    });

phrase.js

cow jumped over the moon

files/one.js 1

files/two.js 2

output

output/1.js   (1 cow jumped over the moon)
output/2.js   (2 cow jumped over the moon)

Upvotes: 0

Views: 1119

Answers (1)

jfriend00
jfriend00

Reputation: 708146

Why is the data being written to file at the very end (once all of the files have been processed)?

Your loops are synchronous. Your file operations are asynchronous. As such, you run all your loops and start all the file operations and then they all run somewhat in parallel together so they all finish some time later.

is there a way to write the data to file as soon as it is ready, rather than holding it all in memory until all the data for each file is ready?

Using Promises and Await in ES6

To sequence your file writes, you have to write the asynchronous code differently. With ES6, it's a bit easier using promises and await. Here's an example:

const fs = require('fs');
const util = require('util');

// create promisified versions of fs methods we will use
const readFile = util.promisify(fs.readFile);
const readdir = util.promisify(fs.readdir);
const appendfile = util.promisify(fs.appendFile);

async function run() {
    let somephrase = await readFile('./data/phrase.js').toString();
    let files = await readdir('./files');
    for (let file of files) {
        try {
            let f = './files/' + file;
            let somenumber = await readFile(f).toString();
            //intermingle the data from initial file (phrase.js) with each of the files in files dir
            let output = somenumber + somephrase;
            //write output to new files
            let output_file = './output/' + somenumber + 'js';
            await appendFile(output_file, output);        
        } catch(e) {
            console.log("error in loop", e);
        }
    }
}

run().then(() => {
   // all done here
}).catch(err => {
   // error occurred here
});

Using Promises and .reduce() to serialize

If you wanted to do this without using await, you would have to manually sequence the operations. A common design pattern for doing that with promises is to use .reduce() with chained promises. The general pattern looks like this:

array.reduce((p, item) => {
    return p.then(() => {
        return fn(item);
    })
}, Promise.resolve().then(() => {
    // iteration all done here
}).catch(err => {
    // process error here
});

Where:

fn(item) is your function that returns a promise that is called for each item in the array. You can add more arguments to the function call if desired.


And, this pattern could be applied to your specific code like this:

const fs = require('fs');
const readFile = util.promisify(fs.readFile);
const readdir = util.promisify(fs.readdir);
const appendfile = util.pro

readFile('./data/phrase.js').then(data => {
    return data.toString();
}).then(somephrase => {
    return readdir('./files').then(files => {
        return files.reduce((p, file) => {
            return p.then(() => {
                let f = './files/' + file;
                return readFile(f).then(data => {
                    let output_file = './output/' + data.toString() + 'js';
                    let output = somenumber + somephrase;
                    return appendFile(output_file, output);
                });
            });
        }, Promise.resolve());
    });
}).then(() => {
   // all done here
}).catch(err => {
   // error occurred here
});

Using Bluebird promise library

There is also a promise library called Bluebird that can make this a bit easier because it contains serialization features and promisification features:

const Promise = require('bluebird');    
const fs = Promise.promsifyAll(require('fs'));

fs.readFileAsync('./data/phrase.js').then(data => {
    let somephrase = data.toString();
    return fs.readdirAsync('./files').then(files => {
        // iterate array serially
        return Promise.each(files, file => {
            let f = './files/' + file;
            return fs.readFileAsync(f).then(data => {
                let output_file = './output/' + data.toString() + 'js';
                let output = somenumber + somephrase;
                return fs.appendFileAsync(output_file, output);
            });
        });
    });
})

Upvotes: 2

Related Questions