Reputation: 425
I'm trying to read big json file that contain news in node.js, but I face an error:
RangeError [ERR_FS_FILE_TOO_LARGE]: File size (3472064213) is greater than 2 GB at new NodeError (node:internal/errors:371:5) at FSReqCallback.readFileAfterStat [as oncomplete] (node:fs:335:11) { code: 'ERR_FS_FILE_TOO_LARGE' }
The code:
var fs = require("fs");
fs.readFile("GOV.json", { encoding: "utf-8" }, function (err, data) {
if (err) {
throw err;
}
// Build up a giant bulk request for elasticsearch.
bulk_request = data.split("\n").reduce(function (bulk_request, line) {
var obj, tweets;
try {
obj = JSON.parse(line);
} catch (e) {
console.log("Done reading 1");
return bulk_request;
}
// Rework the data slightly
tweets = {
id: obj.id,
username: obj.username,
tweet: obj.tweet,
date: new Date(obj.date),
url: obj.url,
};
bulk_request.push({
index: { _index: "tweets_index", _type: "tweets", _id: tweets.id },
});
bulk_request.push(tweets);
return bulk_request;
}, []);
// A little voodoo to simulate synchronous insert
var busy = false;
var callback = function (err, resp) {
if (err) {
console.log(err);
}
busy = false;
};
// Recursively whittle away at bulk_request, 1000 at a time.
var perhaps_insert = function () {
if (!busy) {
busy = true;
client.bulk(
{
body: bulk_request.slice(0, 1000),
},
callback
);
bulk_request = bulk_request.slice(1000);
console.log(bulk_request.length);
}
if (bulk_request.length > 0) {
setTimeout(perhaps_insert, 100);
} else {
console.log("Inserted all records.");
}
};
perhaps_insert();
});
Upvotes: 2
Views: 13323
Reputation: 1052
According to this answer on GitHub, 2GB is the limit:
That is the max buffer size in node. To import larger files, the code will need to change the imports to streams instead of putting the whole file in a buffer (...).
Upvotes: 2