JSON to Python dataframe: mapping values from another file

Question

I have some JSON files like the below, for every student id there will be a corresponding JSON file with mark details.

students.json

{
    "Students": [
        {
            "StudentName": "AAA",
            "Sid": 1020,
            "Saddress": "st.aaa",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "BBB",
            "Sid": 1021,
            "Saddress": "st.bbb",
            "Sdob": "11-11-1999"
        },
        {
            "StudentName": "CCC",
            "Sid": 1022,
            "Saddress": "st.fff",
            "Sdob": "05-12-1999"
        },
        {
            "StudentName": "DDD",
            "Sid": 1023,
            "Saddress": "st.ddd",
            "Sdob": "15-09-1999"
        },
        {
            "StudentName": "EEE",
            "Sid": 1024,
            "Saddress": "st.eee",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "FFF",
            "Sid": 1025,
            "Saddress": "st.ddd",
            "Sdob": "20-11-1999"
        },
        {
            "StudentName": "GGG",
            "Sid": 1026,
            "Saddress": "st.ggg",
            "Sdob": "25-11-1999"
        },
        {
            "StudentName": "JJJ",
            "Sid": 1019,
            "Saddress": "st.aaa",
            "Sdob": "18-11-1999"
        }
    ]
}

1020.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1021.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1022.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1023.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1024.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1025.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1026.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1019.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

I need to get an output like this

My code:

import json
import pandas as pd

data_JSON = open("students.json")

json_str = data_JSON.read()
data= json.loads(json_str)

print("JSON Data")
print(data)

def Normalize(data_JSON,record_path):
    temp = json.dumps(data_JSON)
    ar = json.loads(temp)
    df = pd.json_normalize(ar[record_path])
    return df

data2 = Normalize(data, "Students")
print("
After normalizing JSON Data")
print(data2)

df0 = pd.DataFrame(data2)
df1 = df0[['StudentName', 'Sid']]
print("
Taking specific columns from the df")
print(df1)

files = []

for i, row in df1.groupby('Sid').size().iteritems():
    file = str(i)+".json"
    files.append(file)

print("
File Names")
print(files)

df2 = pd.DataFrame(columns=['StudentName','Sid'])
merged = pd.DataFrame()

for i in files:
    data_JSON = open(i)
    json_str = data_JSON.read()
    data= json.loads(json_str)
    marks = Normalize(data, "marks")
    df = pd.DataFrame(marks)
    merged = pd.concat([df2, df], ignore_index=True, sort=False)

print("
Marks")
print(merged)

# saving the dataframe
merged.to_csv('StudentsMark.csv',index=False)

Output:

is it possible to insert the StudentName and Sid along with the mark? I dont want to give the StudentName and Sid directly to the dataframe df2, when the program fetches the mark from each file it should be able to map and add the StudentName and Sid to df2 for each file.

Jason Baker · Accepted Answer

Using json_normalize:

import json

import pandas as pd


path = "path/to/files"


def get_data(file: str) -> json:
    with open(f"{path}/{file}.json", "r") as f:
        return json.loads(f.read())


students_df = pd.json_normalize(data=get_data("students"), record_path="Students")

student_ids = students_df["Sid"].tolist()
grades_df = pd.concat([pd.json_normalize(data=get_data(x), record_path="marks").assign(Id=x) for x in student_ids])

(pd
 .merge(left=students_df, right=grades_df, left_on="Sid", right_on="Id")
 .drop(columns="Id")
 .to_csv(f"{path}/students_marks.csv", index=False)
 )

JSON to Python dataframe: mapping values from another file

Answers (1)

Related Questions