kharandziuk
kharandziuk

Reputation: 12890

Splitting a really long file with JSON array

I have really big file with json array(~8GB). I need to split it into a group of small files each contains a part of the array.

The array contains only objects.

I decided to implement this algorithm:

I tried to implement it by myself but finish with something like this:

var fs = require('fs');

readable = fs.createReadStream("walmart.dump", {
    encoding: 'utf8',
    fd: null,
});
var chunk, buffer = '', counter=0;
readable.on('readable', function() {
    readable.read(1);
    while (null !== (chunk = readable.read(1))) {
        buffer += chunk; // chunk is one symbol
        console.log(buffer.length);
        if (chunk !== '}') continue;
        try {
            var res = JSON.parse(buffer);
            console.log(res);
            readable.read(1);
            readable.read(1);
            readable.read(1);
            //Array.apply(null, {length: 10}).map(function(){return readable.read(1)});
            buffer = '{';
        } catch(e) { }
    }
})

Did somebody resolve a similar problem?

Upvotes: 0

Views: 491

Answers (1)

saintedlama
saintedlama

Reputation: 6898

Clarinet module (https://github.com/dscape/clarinet) looks quite promising to me. It's based on sax-js so it should be quite robust and well tested.

Upvotes: 1

Related Questions