Reputation: 13
I am using Nodejs to read JSON objects from a really large JSON file (1GB+). The JSON file has the format [{field1: x, field2: x, field3: x},{...},...,{...}]. There is no line separation for each object. In order to avoid memory problems, I am using fs.createReadStream and treating each chunk of data in sequence. This works and I get valid JSON objects, but the reader stops after reading only one data chunk. Why is it not reading the rest of the file?
My solution was inspired by the accepted answer in this question: Parse large JSON file in Nodejs
Here is the code:
// Get the JSON file
var fs = require('fs');
var stream = fs.createReadStream('Results.json', {flags: 'r', encoding: 'utf-8'});
var buf = '';
var count = 0;
stream.on('data', function(chunk) {
console.log("Stream on data!"); // ONLY EXECUTED ONCE
buf += chunk.toString(); // when data is read, stash it in a string buffer
process(); // process the buffer
});
stream.on('error', function(err) {
// NEVER EXECUTED
console.log(err);
});
stream.on('end', function() {
// NEVER EXECUTED
console.log("Count: " + count);
});
function process() {
var posStart = buf.indexOf('{');
var posEnd = buf.indexOf('}');
while (posStart >= 0 || posEnd >= 0) { // keep going until the start or end of the json object in the string
// IF the start bracket is before the end, skip to the start
if((posStart < posEnd || posEnd < 0) && posStart >= 0){
buf = buf.slice(posStart);
}
if(posStart == 0 && posEnd >= 0){ // IF the end bracket is next
processObjectString(buf.slice(0, posEnd+1)); // Process the complete object string
buf = buf.slice(posEnd+1); // Remove the processed string from the buffer
}
// Update the positions
posStart = buf.indexOf('{');
posEnd = buf.indexOf('}');
}
}
function processObjectString(objectString) {
count++;
var obj = JSON.parse(objectString); // parse the JSON
console.log(obj.id); // Print object ID (works)
}
EDIT: After fixing the errors causing the infinite while loop, the following is a working solution iterating through all the objects in the JSON file. It might not be very elegant, but at least it works (for anyone who might have a similar problem).
// Get the JSON file
var fs = require('fs');
var stream = fs.createReadStream('Results.json', {flags: 'r', encoding: 'utf-8'});
var buf = '';
var count = 0;
stream.on('data', function(chunk) {
buf += chunk.toString(); // when data is read, stash it in a string buffer
process(buf); // process the buffer
});
stream.on('error', function(err) {
console.log(err);
});
stream.on('end', function() {
console.log("Count: " + count);
});
function process() {
var posStart = buf.indexOf('{');
var posEnd = buf.indexOf('}');
while (posStart >= 0 || posEnd >= 0) { // keep going until the start or end of the json object in the string
// IF the start bracket is before the end, skip to the start
if((posStart < posEnd || posEnd < 0) && posStart >= 0){
buf = buf.slice(posStart);
}
if(posStart == 0 && posEnd >= 0){ // IF the end bracket is next
processObjectString(buf.slice(0, posEnd+1)); // Process the complete object string
buf = buf.slice(posEnd+1); // Remove the processed string from the buffer
}else if(posStart < 0 || posEnd < 0){ // Return to get a new chunk
return;
}
// Update the positions
posStart = buf.indexOf('{');
posEnd = buf.indexOf('}');
}
}
function processObjectString(objectString) {
count++;
var obj = JSON.parse(objectString); // parse the JSON
console.log(obj.id); // Print object ID (works)
}
Upvotes: 1
Views: 4523
Reputation: 8635
Node.js is asynchronous but is actually single threaded. If the process gets stuck on processing the data received, it will never receive a second chunk, as the sender waits for the stuck thread to be released before it can do anything.
If the line process();
inside of 'data', function(chunk)
is in infinite loop, then you will never receive a second chunk, thus it may look like sender is being lazy.
For the future: try to always isolate the problem to ensure you are looking in the right place.
P.S. it actually is easy to get yourself into infinite loop while processing text, I feel your pain here.
Upvotes: 2