Reputation: 12890
I have really big file with json array(~8GB). I need to split it into a group of small files each contains a part of the array.
The array contains only objects.
I decided to implement this algorithm:
I tried to implement it by myself but finish with something like this:
var fs = require('fs');
readable = fs.createReadStream("walmart.dump", {
encoding: 'utf8',
fd: null,
});
var chunk, buffer = '', counter=0;
readable.on('readable', function() {
readable.read(1);
while (null !== (chunk = readable.read(1))) {
buffer += chunk; // chunk is one symbol
console.log(buffer.length);
if (chunk !== '}') continue;
try {
var res = JSON.parse(buffer);
console.log(res);
readable.read(1);
readable.read(1);
readable.read(1);
//Array.apply(null, {length: 10}).map(function(){return readable.read(1)});
buffer = '{';
} catch(e) { }
}
})
Did somebody resolve a similar problem?
Upvotes: 0
Views: 491
Reputation: 6898
Clarinet module (https://github.com/dscape/clarinet) looks quite promising to me. It's based on sax-js so it should be quite robust and well tested.
Upvotes: 1