Reputation: 69

How to deal with tolist() and nullvalue by index

When I use .tolist(), it split only not null value. The index is the error by up. import pandas as pd from ast import literal_eval

d = [{'CusID': 1, 'Name': 'Paul', 'Shop': 'Pascal', 
  'Item': '[{"Food":"001","Water":"Melon","Dessert":"Mango"}]',
  'Card': '[{"Main":"Yes", "Second":""}]', 'Type': '{"VIP":"YES"}', 'Price': 24000}, 
  {'CusID': 2, 'Name': 'Mark', 'Shop': 'Casio', 'Item': '[{"Food":"001","Water":"Apple","Dessert":"Mango"}]', 
   'Card': '[{"Main":"", "Second":"Yes"}]', 'Type': '{"VIP":"YES"}', 'Price': 30800}, 
  {'CusID': 3, 'Name': 'Jame', 'Shop': 'Casio', 'Item': '[]', 
   'Card': '[]', 'Type': '{}', 'Price': 17000},
  {'CusID': 4, 'Name': 'Bill', 'Shop': 'Nike', 'Item': '[{"Food":"004","Water":"Banana","Dessert":""}]',
  'Card': '[{"Main":"Yes", "Second":""}]', 'Type': '{"VIP":"YES"}', 'Price': 900}]
df = pd.DataFrame(d)

Dataframe:

CusID   Name    Shop    Item                                                Card              Type  Price
1       Paul    Pascal  [{"Food":"001","Water":"Melon","Dessert":"Mang...   [{"Main":"Yes", "Second":""}]   {"VIP":"YES"}   24000
2       Mark    Casio   [{"Food":"001","Water":"Apple","Dessert":"Mang...   [{"Main":"", "Second":"Yes"}]   {"VIP":"YES"}   30800
3       Jame    Casio   []                                                  []                             {}   17000
4       Bill    Nike    [{"Food":"004","Water":"Banana","Dessert":""}]      [{"Main":"Yes", "Second":""}]   {"VIP":"YES"}   900

Then

def f(x):
    try:
        return literal_eval(str(x))   
    except Exception as e:
        return []

df["Item"] = df["Item"].apply(lambda x: f(x))
df["Card"] = df["Card"].apply(lambda x: f(x))
df["Type"] = df["Type"].apply(lambda x: f(x))

a = pd.DataFrame(df.pop('Item').str[0].dropna().tolist()).add_prefix('Item_')
b = pd.DataFrame(df.pop('Card').str[0].dropna().tolist()).add_prefix('Card_')
c = pd.DataFrame(df.pop('Type').tolist())

out2 = df.join(i for i in [a,b,c]).fillna('')
out2

The result:

CusID   Name    Shop    Price   Item_Food   Item_Water  Item_Dessert    Card_Main   Card_Second VIP
1       Paul    Pascal  24000   001         Melon       Mango           Yes                     YES
2       Mark    Casio   30800   001         Apple       Mango                       Yes         YES
3       Jame    Casio   17000   004         Banana      Yes     
4       Bill    Nike    900                                                                     YES

I found it's wrong result. I want to get here:

CusID   Name    Shop    Price   Item_Food   Item_Water  Item_Dessert    Card_Main   Card_Second VIP
1       Paul    Pascal  24000   001         Melon       Mango           Yes                     YES
2       Mark    Casio   30800   001         Apple       Mango                       Yes         YES
3       Jame    Casio   17000           
4       Bill    Nike    900     004         Banana                      Yes                     YES

Upvotes: 2

Answers (2)

sammywemmy

Reputation: 28709

Gave it a shot within the dictionary d - hopefully it is extensible for your use case :

from ast import literal_eval

#container for various iteration outputs
box = []
for entry in d:
    dict1 = {}
    #collect data for keys that dont require ast literal eval
    [dict1.update({other_key : val}) 
     for other_key, val in entry.items() 
     if other_key in ("CusID", "Name", "Shop","Price")]

    #Item and Card are where literal evaluation is required
    #plus the data is embedded in a list
    for key in entry:
        if key in ("Item", "Card") : 
            for ent in literal_eval(entry.get(key)):
                res = {F"{key}_{k}" : v for k,v in ent.items()}
                dict1.update(res)
        #Type requires literal eval
        #but is in a dict, so we get our data a bit easier than the previous ones
        if key == "Type":
            dict1.update({key:value
                          for key, value in 
                          literal_eval(entry.get(key)).items()
                         })

    box.append(dict1)


print(box)

[{'CusID': 1,
  'Name': 'Paul',
  'Shop': 'Pascal',
  'Price': 24000,
  'Item_Food': '001',
  'Item_Water': 'Melon',
  'Item_Dessert': 'Mango',
  'Card_Main': 'Yes',
  'Card_Second': '',
  'VIP': 'YES'},
 {'CusID': 2,
  'Name': 'Mark',
  'Shop': 'Casio',
  'Price': 30800,
  'Item_Food': '001',
  'Item_Water': 'Apple',
  'Item_Dessert': 'Mango',
  'Card_Main': '',
  'Card_Second': 'Yes',
  'VIP': 'YES'},
 {'CusID': 3, 'Name': 'Jame', 'Shop': 'Casio', 'Price': 17000},
 {'CusID': 4,
  'Name': 'Bill',
  'Shop': 'Nike',
  'Price': 900,
  'Item_Food': '004',
  'Item_Water': 'Banana',
  'Item_Dessert': '',
  'Card_Main': 'Yes',
  'Card_Second': '',
  'VIP': 'YES'}]


#get dataframe

pd.DataFrame(box)


#could not quite get the formatting right for wide dataframes

Upvotes: 0

jezrael

Reputation: 863226

Idea is create new DataFrames with index of Series after removed missing values by Series.dropna:

def f(x):
    try:
        return ast.literal_eval(str(x))   
    except:
        return []

cols = ['Item','Card','Type']
df[cols] = df[cols].applymap(lambda x: f(x))


s1 = df.pop('Item').str[0].dropna()
s2 = df.pop('Card').str[0].dropna()

a = pd.DataFrame(s1.tolist(), index=s1.index).add_prefix('Item_')
b = pd.DataFrame(s2.tolist(), index=s2.index).add_prefix('Card_')
c = pd.DataFrame(df.pop('Type').tolist(), index=df.index)

out2 = df.join(i for i in [a,b,c]).fillna('')

print (out2)
   CusID  Name    Shop  Price Item_Food Item_Water Item_Dessert Card_Main  \
0      1  Paul  Pascal  24000       001      Melon        Mango       Yes   
1      2  Mark   Casio  30800       001      Apple        Mango             
2      3  Jame   Casio  17000                                               
3      4  Bill    Nike    900       004     Banana                    Yes   

  Card_Second  VIP  
0              YES  
1         Yes  YES  
2                   
3              YES

Upvotes: 1

How to deal with tolist() and nullvalue by index

Answers (2)

Related Questions