Aks3
Aks3

Reputation: 71

JSON to Python dataframe: mapping values from another file

I have some JSON files like the below, for every student id there will be a corresponding JSON file with mark details.

students.json

{
    "Students": [
        {
            "StudentName": "AAA",
            "Sid": 1020,
            "Saddress": "st.aaa",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "BBB",
            "Sid": 1021,
            "Saddress": "st.bbb",
            "Sdob": "11-11-1999"
        },
        {
            "StudentName": "CCC",
            "Sid": 1022,
            "Saddress": "st.fff",
            "Sdob": "05-12-1999"
        },
        {
            "StudentName": "DDD",
            "Sid": 1023,
            "Saddress": "st.ddd",
            "Sdob": "15-09-1999"
        },
        {
            "StudentName": "EEE",
            "Sid": 1024,
            "Saddress": "st.eee",
            "Sdob": "10-11-1999"
        },
        {
            "StudentName": "FFF",
            "Sid": 1025,
            "Saddress": "st.ddd",
            "Sdob": "20-11-1999"
        },
        {
            "StudentName": "GGG",
            "Sid": 1026,
            "Saddress": "st.ggg",
            "Sdob": "25-11-1999"
        },
        {
            "StudentName": "JJJ",
            "Sid": 1019,
            "Saddress": "st.aaa",
            "Sdob": "18-11-1999"
        }
    ]
}

1020.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1021.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1022.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1023.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1024.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1025.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1026.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

1019.json

{
    "marks": [
        {
            "English": 50,
            "Math": 75,
            "Art": 75,
            "Science": 80,
            "History": 30,
            "Geography": 35,
            "Physical Education": 90,
            "Chemistry": 80,
            "Physics": 85,
            "Biology": 75
        }
    ]
}

I need to get an output like this what i need

My code:

import json
import pandas as pd

data_JSON = open("students.json")

json_str = data_JSON.read()
data= json.loads(json_str)

print("JSON Data")
print(data)

def Normalize(data_JSON,record_path):
    temp = json.dumps(data_JSON)
    ar = json.loads(temp)
    df = pd.json_normalize(ar[record_path])
    return df

data2 = Normalize(data, "Students")
print("\nAfter normalizing JSON Data")
print(data2)

df0 = pd.DataFrame(data2)
df1 = df0[['StudentName', 'Sid']]
print("\nTaking specific columns from the df")
print(df1)

files = []

for i, row in df1.groupby('Sid').size().iteritems():
    file = str(i)+".json"
    files.append(file)

print("\nFile Names")
print(files)

df2 = pd.DataFrame(columns=['StudentName','Sid'])
merged = pd.DataFrame()

for i in files:
    data_JSON = open(i)
    json_str = data_JSON.read()
    data= json.loads(json_str)
    marks = Normalize(data, "marks")
    df = pd.DataFrame(marks)
    merged = pd.concat([df2, df], ignore_index=True, sort=False)

print("\nMarks")
print(merged)

# saving the dataframe
merged.to_csv('StudentsMark.csv',index=False)

Output: output

is it possible to insert the StudentName and Sid along with the mark? I dont want to give the StudentName and Sid directly to the dataframe df2, when the program fetches the mark from each file it should be able to map and add the StudentName and Sid to df2 for each file.

Upvotes: 1

Views: 95

Answers (1)

Jason Baker
Jason Baker

Reputation: 3706

Using json_normalize:

import json

import pandas as pd


path = "path/to/files"


def get_data(file: str) -> json:
    with open(f"{path}/{file}.json", "r") as f:
        return json.loads(f.read())


students_df = pd.json_normalize(data=get_data("students"), record_path="Students")

student_ids = students_df["Sid"].tolist()
grades_df = pd.concat([pd.json_normalize(data=get_data(x), record_path="marks").assign(Id=x) for x in student_ids])

(pd
 .merge(left=students_df, right=grades_df, left_on="Sid", right_on="Id")
 .drop(columns="Id")
 .to_csv(f"{path}/students_marks.csv", index=False)
 )

Upvotes: 2

Related Questions