Reputation: 11
I have a large dataset in JSON format, for ease of use, I want to split it into multiple json files while still maintaining the structure.
For ex:{
"{"users": [
{
"userId": 1,
"firstName": "Krish",
"lastName": "Lee",
"phoneNumber": "123456",
"emailAddress": "[email protected]"
},
{
"userId": 2,
"firstName": "racks",
"lastName": "jacson",
"phoneNumber": "123456",
"emailAddress": "[email protected]"
},
{
"userId": 3,
"firstName": "denial",
"lastName": "roast",
"phoneNumber": "33333333",
"emailAddress": "[email protected]"
},
{
"userId": 4,
"firstName": "devid",
"lastName": "neo",
"phoneNumber": "222222222",
"emailAddress": "[email protected]"
},
{
"userId": 5,
"firstName": "jone",
"lastName": "mac",
"phoneNumber": "111111111",
"emailAddress": "[email protected]"
}
]
}
I should be able to split it in such a way that each userid goes to a different file.
So far, i have tried putting them to a map and try to split the map, and converting it into array and split the array with not much luck. The files contain the userid but it is not in json format anymore
Any suggestions on how this can be achieved in Java?
Expected result: {"users": [
{
"userId": 1,
"firstName": "Krish",
"lastName": "Lee",
"phoneNumber": "123456",
"emailAddress": "[email protected]"
}
]
}
Upvotes: 0
Views: 4002
Reputation: 1587
To process large files prefer to use stream/event oriented parsing. Both Gson and Jackson support that way. Just an illustration with a tiny JSON parser https://github.com/anatolygudkov/green-jelly:
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;
public class SplitMyJson {
private static final String jsonToSplit = "{\"users\": [\n" +
" {\n" +
" \"userId\": 1,\n" +
" \"firstName\": \"Krish\",\n" +
" \"lastName\": \"Lee\",\n" +
" \"phoneNumber\": \"123456\",\n" +
" \"emailAddress\": \"[email protected]\"\n" +
" },\n" +
" {\n" +
" \"userId\": 2,\n" +
" \"firstName\": \"racks\",\n" +
" \"lastName\": \"jacson\",\n" +
" \"phoneNumber\": \"123456\",\n" +
" \"emailAddress\": \"[email protected]\"\n" +
" },\n" +
" {\n" +
" \"userId\": 3,\n" +
" \"firstName\": \"denial\",\n" +
" \"lastName\": \"roast\",\n" +
" \"phoneNumber\": \"33333333\",\n" +
" \"emailAddress\": \"[email protected]\"\n" +
" },\n" +
" {\n" +
" \"userId\": 4,\n" +
" \"firstName\": \"devid\",\n" +
" \"lastName\": \"neo\",\n" +
" \"phoneNumber\": \"222222222\",\n" +
" \"emailAddress\": \"[email protected]\"\n" +
" },\n" +
" {\n" +
" \"userId\": 5,\n" +
" \"firstName\": \"jone\",\n" +
" \"lastName\": \"mac\",\n" +
" \"phoneNumber\": \"111111111\",\n" +
" \"emailAddress\": \"[email protected]\"\n" +
" }\n" +
" ]\n" +
"}";
public static void main(String[] args) {
final JsonParser parser = new JsonParser();
parser.setListener(new Splitter(new File("/home/gudkov/mytest")));
parser.parse(jsonToSplit); // if you read a file, call parse() several times part by part in a loop until EOF
parser.eoj(); // and then call .eoj()
}
static class Splitter extends JsonParserListenerAdaptor {
private final JsonGenerator jsonGenerator = new JsonGenerator();
private final AppendableWriter<Writer> appendableWriter = new AppendableWriter<>();
private final File outputFolder;
private int objectDepth;
private int userIndex;
Splitter(final File outputFolder) {
this.outputFolder = outputFolder;
if (!outputFolder.exists()) {
outputFolder.mkdirs();
}
jsonGenerator.setOutput(appendableWriter);
}
private boolean userJustStarted() {
return objectDepth == 2;
}
private boolean userJustEnded() {
return objectDepth == 1;
}
private boolean notInUser() {
return objectDepth < 2;
}
@Override
public boolean onObjectStarted() {
objectDepth++;
if (notInUser()) return true;
if (userJustStarted()) {
try {
appendableWriter.set(new FileWriter(new File(outputFolder, "user-" + userIndex + ".json")));
} catch (IOException e) {
throw new UncheckedIOException(e);
}
userIndex++;
}
jsonGenerator.startObject();
return true;
}
@Override
public boolean onObjectEnded() {
if (notInUser()) {
objectDepth--;
return true;
}
objectDepth--;
jsonGenerator.endObject();
if (userJustEnded()) { // user object ended
try {
jsonGenerator.eoj();
appendableWriter.output().close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
return true;
}
@Override
public boolean onArrayStarted() {
if (notInUser()) return true;
jsonGenerator.startArray();
return true;
}
@Override
public boolean onArrayEnded() {
if (notInUser()) return true;
jsonGenerator.endArray();
return true;
}
@Override
public boolean onObjectMember(final CharSequence name) {
if (notInUser()) return true;
jsonGenerator.objectMember(name);
return true;
}
@Override
public boolean onStringValue(final CharSequence data) {
if (notInUser()) return true;
jsonGenerator.stringValue(data, true);
return true;
}
@Override
public boolean onNumberValue(final JsonNumber number) {
if (notInUser()) return true;
jsonGenerator.numberValue(number);
return true;
}
@Override
public boolean onTrueValue() {
if (notInUser()) return true;
jsonGenerator.trueValue();
return true;
}
@Override
public boolean onFalseValue() {
if (notInUser()) return true;
jsonGenerator.falseValue();
return true;
}
@Override
public boolean onNullValue() {
if (notInUser()) return true;
jsonGenerator.nullValue();
return true;
}
}
}
In this way you can easily implement filtering, aggregating etc. for really large files with the highest performance possible in regular Java.
Upvotes: 1