Reputation: 55
I'm trying to parse huge json file to 2d array.
I can parse. But required memory is almost 10times.
My sample.json file has 100,000 rows, each with a different item.
If sample.json is 500MB this code need 5GB.
How can i reduce memory usage?
I use Newtonsoft.Json, .Net6.0
Read from json
static void Read()
{
List<Dictionary<string, string>> rows = new List<Dictionary<string, string>>();
string path = @"D:\small.json";
using (FileStream fsRead = File.Open(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
using (BufferedStream bsRead = new BufferedStream(fsRead))
using (StreamReader srRead = new StreamReader(bsRead))
{
string? line;
while ((line = srRead.ReadLine()) != null)
{
JObject jsonObject = JObject.Parse(line);
MakeRowData(jsonObject, out var row);
rows.Add(row);
}
}
}
Make row
private static void MakeRowData(JObject jsonData, out Dictionary<string, string> row)
{
Dictionary<string, string> output = new Dictionary<string, string>();
foreach (var item in jsonData)
{
int childSize = 0;
if (item.Value != null)
{
childSize = item.Value.Children().Count();
///if Item has child, explore deep
if (childSize > 0)
{
ExploreChild(item.Value, ref output);
}
///or not just add new item
else
{
string str = item.Value.ToString();
output[item.Key] = str ?? "";
}
}
}
row = output;
}
private static void ExploreChild(JToken jToken, ref Dictionary<string, string> row)
{
foreach (var item in jToken)
{
int childSize = item.Children().Count();
///if Item has child, explore deep
if (childSize > 0)
{
ExploreChild(item, ref row);
}
///or not just add new item
else
{
string path = jToken.Path.Replace('[', '(').Replace(']', ')');
string str = jToken.First.ToString();
row[path] = str?? "";
}
}
}
EDIT Add Sample.json
It is set of json strings.
And Fields are not fixed.
Sample.json
{Field1:0,Field2:1,Field2:3}
{Field1:0,Field5:1,Field6:3}
{Field1:0,Field7:1,Field9:3}
{Field1:0,Field13:1,Field50:3,Field57:3}
...
Upvotes: 0
Views: 773
Reputation: 690
You can try replacing the recursive exploring children with the iterative one. Something like this:
private static void MakeRowData(JObject jsonData, out Dictionary<string, string> row)
{
Dictionary<string, string> output = new Dictionary<string, string>();
foreach (var item in jsonData)
{
if (item.Value != null)
{
///if Item has child, explore deep
if (item.Value.HasValues)
{
var queue = new Queue<JToken>();
queue.Enqueue(item.Value);
while (queue.Any())
{
var currItem = queue.Dequeue();
if (currItem.HasValues)
{
foreach(var child in item)
queue.Enqueue(child);
}
else
{
// add item without children to row here
}
}
}
///or not just add new item
else
{
string str = item.Value.ToString();
output[item.Key] = str ?? "";
}
}
}
row = output;
}
Recursive calls, unless it is a tail recursion, keep the stack of a method they were called from. This can lead to extensive memory usage.
Upvotes: 1