user1851202
user1851202

Reputation: 11

Split a large JSON file into smaller JSON files using Java

I have a large dataset in JSON format, for ease of use, I want to split it into multiple json files while still maintaining the structure. For ex:{ "{"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "[email protected]" }, { "userId": 2, "firstName": "racks", "lastName": "jacson", "phoneNumber": "123456", "emailAddress": "[email protected]" }, { "userId": 3, "firstName": "denial", "lastName": "roast", "phoneNumber": "33333333", "emailAddress": "[email protected]" }, { "userId": 4, "firstName": "devid", "lastName": "neo", "phoneNumber": "222222222", "emailAddress": "[email protected]" }, { "userId": 5, "firstName": "jone", "lastName": "mac", "phoneNumber": "111111111", "emailAddress": "[email protected]" } ] } I should be able to split it in such a way that each userid goes to a different file. So far, i have tried putting them to a map and try to split the map, and converting it into array and split the array with not much luck. The files contain the userid but it is not in json format anymore Any suggestions on how this can be achieved in Java?

Expected result: {"users": [ { "userId": 1, "firstName": "Krish", "lastName": "Lee", "phoneNumber": "123456", "emailAddress": "[email protected]" } ] }

Upvotes: 0

Views: 4002

Answers (1)

AnatolyG
AnatolyG

Reputation: 1587

To process large files prefer to use stream/event oriented parsing. Both Gson and Jackson support that way. Just an illustration with a tiny JSON parser https://github.com/anatolygudkov/green-jelly:

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.io.Writer;

public class SplitMyJson {
    private static final String jsonToSplit = "{\"users\": [\n" +
            "    {\n" +
            "      \"userId\": 1,\n" +
            "      \"firstName\": \"Krish\",\n" +
            "      \"lastName\": \"Lee\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"[email protected]\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 2,\n" +
            "      \"firstName\": \"racks\",\n" +
            "      \"lastName\": \"jacson\",\n" +
            "      \"phoneNumber\": \"123456\",\n" +
            "      \"emailAddress\": \"[email protected]\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 3,\n" +
            "      \"firstName\": \"denial\",\n" +
            "      \"lastName\": \"roast\",\n" +
            "      \"phoneNumber\": \"33333333\",\n" +
            "      \"emailAddress\": \"[email protected]\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 4,\n" +
            "      \"firstName\": \"devid\",\n" +
            "      \"lastName\": \"neo\",\n" +
            "      \"phoneNumber\": \"222222222\",\n" +
            "      \"emailAddress\": \"[email protected]\"\n" +
            "    },\n" +
            "    {\n" +
            "      \"userId\": 5,\n" +
            "      \"firstName\": \"jone\",\n" +
            "      \"lastName\": \"mac\",\n" +
            "      \"phoneNumber\": \"111111111\",\n" +
            "      \"emailAddress\": \"[email protected]\"\n" +
            "    }\n" +
            "  ]\n" +
            "}";

    public static void main(String[] args) {
        final JsonParser parser = new JsonParser();
        parser.setListener(new Splitter(new File("/home/gudkov/mytest")));
        parser.parse(jsonToSplit); // if you read a file, call parse() several times part by part in a loop until EOF
        parser.eoj(); // and then call .eoj()
    }

    static class Splitter extends JsonParserListenerAdaptor {
        private final JsonGenerator jsonGenerator = new JsonGenerator();
        private final AppendableWriter<Writer> appendableWriter = new AppendableWriter<>();

        private final File outputFolder;
        private int objectDepth;
        private int userIndex;

        Splitter(final File outputFolder) {
            this.outputFolder = outputFolder;
            if (!outputFolder.exists()) {
                outputFolder.mkdirs();
            }

            jsonGenerator.setOutput(appendableWriter);
        }

        private boolean userJustStarted() {
            return objectDepth == 2;
        }

        private boolean userJustEnded() {
            return objectDepth == 1;
        }

        private boolean notInUser() {
            return objectDepth < 2;
        }

        @Override
        public boolean onObjectStarted() {
            objectDepth++;

            if (notInUser()) return true;

            if (userJustStarted()) {
                try {
                    appendableWriter.set(new FileWriter(new File(outputFolder, "user-" + userIndex + ".json")));
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
                userIndex++;
            }
            jsonGenerator.startObject();
            return true;
        }

        @Override
        public boolean onObjectEnded() {
            if (notInUser()) {
                objectDepth--;
                return true;
            }

            objectDepth--;

            jsonGenerator.endObject();

            if (userJustEnded()) { // user object ended
                try {
                    jsonGenerator.eoj();
                    appendableWriter.output().close();
                } catch (IOException e) {
                    throw new UncheckedIOException(e);
                }
            }
            return true;
        }

        @Override
        public boolean onArrayStarted() {
            if (notInUser()) return true;
            jsonGenerator.startArray();
            return true;
        }

        @Override
        public boolean onArrayEnded() {
            if (notInUser()) return true;
            jsonGenerator.endArray();
            return true;
        }

        @Override
        public boolean onObjectMember(final CharSequence name) {
            if (notInUser()) return true;
            jsonGenerator.objectMember(name);
            return true;
        }

        @Override
        public boolean onStringValue(final CharSequence data) {
            if (notInUser()) return true;
            jsonGenerator.stringValue(data, true);
            return true;
        }

        @Override
        public boolean onNumberValue(final JsonNumber number) {
            if (notInUser()) return true;
            jsonGenerator.numberValue(number);
            return true;
        }

        @Override
        public boolean onTrueValue() {
            if (notInUser()) return true;
            jsonGenerator.trueValue();
            return true;
        }

        @Override
        public boolean onFalseValue() {
            if (notInUser()) return true;
            jsonGenerator.falseValue();
            return true;
        }

        @Override
        public boolean onNullValue() {
            if (notInUser()) return true;
            jsonGenerator.nullValue();
            return true;
        }
    }
}

In this way you can easily implement filtering, aggregating etc. for really large files with the highest performance possible in regular Java.

Upvotes: 1

Related Questions