Reputation: 69
When I use .tolist(), it split only not null value. The index is the error by up. import pandas as pd from ast import literal_eval
d = [{'CusID': 1, 'Name': 'Paul', 'Shop': 'Pascal',
'Item': '[{"Food":"001","Water":"Melon","Dessert":"Mango"}]',
'Card': '[{"Main":"Yes", "Second":""}]', 'Type': '{"VIP":"YES"}', 'Price': 24000},
{'CusID': 2, 'Name': 'Mark', 'Shop': 'Casio', 'Item': '[{"Food":"001","Water":"Apple","Dessert":"Mango"}]',
'Card': '[{"Main":"", "Second":"Yes"}]', 'Type': '{"VIP":"YES"}', 'Price': 30800},
{'CusID': 3, 'Name': 'Jame', 'Shop': 'Casio', 'Item': '[]',
'Card': '[]', 'Type': '{}', 'Price': 17000},
{'CusID': 4, 'Name': 'Bill', 'Shop': 'Nike', 'Item': '[{"Food":"004","Water":"Banana","Dessert":""}]',
'Card': '[{"Main":"Yes", "Second":""}]', 'Type': '{"VIP":"YES"}', 'Price': 900}]
df = pd.DataFrame(d)
Dataframe:
CusID Name Shop Item Card Type Price
1 Paul Pascal [{"Food":"001","Water":"Melon","Dessert":"Mang... [{"Main":"Yes", "Second":""}] {"VIP":"YES"} 24000
2 Mark Casio [{"Food":"001","Water":"Apple","Dessert":"Mang... [{"Main":"", "Second":"Yes"}] {"VIP":"YES"} 30800
3 Jame Casio [] [] {} 17000
4 Bill Nike [{"Food":"004","Water":"Banana","Dessert":""}] [{"Main":"Yes", "Second":""}] {"VIP":"YES"} 900
Then
def f(x):
try:
return literal_eval(str(x))
except Exception as e:
return []
df["Item"] = df["Item"].apply(lambda x: f(x))
df["Card"] = df["Card"].apply(lambda x: f(x))
df["Type"] = df["Type"].apply(lambda x: f(x))
a = pd.DataFrame(df.pop('Item').str[0].dropna().tolist()).add_prefix('Item_')
b = pd.DataFrame(df.pop('Card').str[0].dropna().tolist()).add_prefix('Card_')
c = pd.DataFrame(df.pop('Type').tolist())
out2 = df.join(i for i in [a,b,c]).fillna('')
out2
The result:
CusID Name Shop Price Item_Food Item_Water Item_Dessert Card_Main Card_Second VIP
1 Paul Pascal 24000 001 Melon Mango Yes YES
2 Mark Casio 30800 001 Apple Mango Yes YES
3 Jame Casio 17000 004 Banana Yes
4 Bill Nike 900 YES
I found it's wrong result. I want to get here:
CusID Name Shop Price Item_Food Item_Water Item_Dessert Card_Main Card_Second VIP
1 Paul Pascal 24000 001 Melon Mango Yes YES
2 Mark Casio 30800 001 Apple Mango Yes YES
3 Jame Casio 17000
4 Bill Nike 900 004 Banana Yes YES
Upvotes: 2
Views: 107
Reputation: 28709
Gave it a shot within the dictionary d
- hopefully it is extensible for your use case :
from ast import literal_eval
#container for various iteration outputs
box = []
for entry in d:
dict1 = {}
#collect data for keys that dont require ast literal eval
[dict1.update({other_key : val})
for other_key, val in entry.items()
if other_key in ("CusID", "Name", "Shop","Price")]
#Item and Card are where literal evaluation is required
#plus the data is embedded in a list
for key in entry:
if key in ("Item", "Card") :
for ent in literal_eval(entry.get(key)):
res = {F"{key}_{k}" : v for k,v in ent.items()}
dict1.update(res)
#Type requires literal eval
#but is in a dict, so we get our data a bit easier than the previous ones
if key == "Type":
dict1.update({key:value
for key, value in
literal_eval(entry.get(key)).items()
})
box.append(dict1)
print(box)
[{'CusID': 1,
'Name': 'Paul',
'Shop': 'Pascal',
'Price': 24000,
'Item_Food': '001',
'Item_Water': 'Melon',
'Item_Dessert': 'Mango',
'Card_Main': 'Yes',
'Card_Second': '',
'VIP': 'YES'},
{'CusID': 2,
'Name': 'Mark',
'Shop': 'Casio',
'Price': 30800,
'Item_Food': '001',
'Item_Water': 'Apple',
'Item_Dessert': 'Mango',
'Card_Main': '',
'Card_Second': 'Yes',
'VIP': 'YES'},
{'CusID': 3, 'Name': 'Jame', 'Shop': 'Casio', 'Price': 17000},
{'CusID': 4,
'Name': 'Bill',
'Shop': 'Nike',
'Price': 900,
'Item_Food': '004',
'Item_Water': 'Banana',
'Item_Dessert': '',
'Card_Main': 'Yes',
'Card_Second': '',
'VIP': 'YES'}]
#get dataframe
pd.DataFrame(box)
#could not quite get the formatting right for wide dataframes
Upvotes: 0
Reputation: 863226
Idea is create new DataFrames with index of Series after removed missing values by Series.dropna
:
def f(x):
try:
return ast.literal_eval(str(x))
except:
return []
cols = ['Item','Card','Type']
df[cols] = df[cols].applymap(lambda x: f(x))
s1 = df.pop('Item').str[0].dropna()
s2 = df.pop('Card').str[0].dropna()
a = pd.DataFrame(s1.tolist(), index=s1.index).add_prefix('Item_')
b = pd.DataFrame(s2.tolist(), index=s2.index).add_prefix('Card_')
c = pd.DataFrame(df.pop('Type').tolist(), index=df.index)
out2 = df.join(i for i in [a,b,c]).fillna('')
print (out2)
CusID Name Shop Price Item_Food Item_Water Item_Dessert Card_Main \
0 1 Paul Pascal 24000 001 Melon Mango Yes
1 2 Mark Casio 30800 001 Apple Mango
2 3 Jame Casio 17000
3 4 Bill Nike 900 004 Banana Yes
Card_Second VIP
0 YES
1 Yes YES
2
3 YES
Upvotes: 1