Reputation: 21

Creating different columns in dataframe from a key pair in a dictionary

This is part of the dictionary I created:

defaultdict (int,
         {"['por', 'rus']": 80,
         "['nld', 'slv']": 4,
         "['jpn', 'pol']": 48,
         "['ces', 'epo']": 4,
         "['oci', 'ron']": 4,
         "['lit', 'mkd']": 2,
         "['deu', 'ewe']": 2,
         "['cat', 'ron']": 4,
         "['ces', 'ita']": 18,
         "['est', 'fra']": 14,
         "['hin', 'mal']": 4,

I want to have 3 columns, column1: the first key, column2: second key and column3: the value.

when i create a dataframe:

pairs_df = pd.DataFrame(list(pairs.iteritems()), columns = ['column1','column2'])
pairs_df.head()

output:

           column1  column2
0   ['por', 'rus']       80
1   ['est', 'fra']       14
2   ['nld', 'slv']        4
3   ['jpn', 'pol']       48
4   ['ces', 'epo']        4
5   ['hin', 'mal']        4
6   ['oci', 'ron']        4
7   ['lit', 'mkd']        2
8   ['deu', 'ewe']        2
9   ['cat', 'ron']        4
10  ['ces', 'ita']       18

The keys get into one column, but i can't manage to get them separated into tree columns.

Upvotes: 2

Answers (3)

jezrael

Reputation: 863531

This solution use dict comprehension with eval and function from_records:

import pandas as pd
from collections import defaultdict

pairs = defaultdict (int,
            {"['por', 'rus']": 80,
             "['nld', 'slv']": 4,
             "['jpn', 'pol']": 48,
             "['ces', 'epo']": 4,
             "['oci', 'ron']": 4,
             "['lit', 'mkd']": 2,
             "['deu', 'ewe']": 2,
             "['cat', 'ron']": 4,
             "['ces', 'ita']": 18,
             "['est', 'fra']": 14,
             "['hin', 'mal']": 4})

rec = [ eval(x[0]) + [x[1]] for x in pairs.iteritems()]
print rec
[['por', 'rus', 80], ['est', 'fra', 14], ['nld', 'slv', 4], ['jpn', 'pol', 48], 
 ['ces', 'epo', 4],  ['hin', 'mal', 4],  ['oci', 'ron', 4], ['lit', 'mkd', 2], 
 ['deu', 'ewe', 2],  ['cat', 'ron', 4],  ['ces', 'ita', 18]]

print pd.DataFrame.from_records(rec, columns=['a','b','c'])
      a    b   c
0   por  rus  80
1   est  fra  14
2   nld  slv   4
3   jpn  pol  48
4   ces  epo   4
5   hin  mal   4
6   oci  ron   4
7   lit  mkd   2
8   deu  ewe   2
9   cat  ron   4
10  ces  ita  18

Upvotes: 0

jezrael

Reputation: 863531

import pandas as pd
from collections import defaultdict
from ast import literal_eval

pairs = defaultdict (int,
            {"['por', 'rus']": 80,
             "['nld', 'slv']": 4,
             "['jpn', 'pol']": 48,
             "['ces', 'epo']": 4,
             "['oci', 'ron']": 4,
             "['lit', 'mkd']": 2,
             "['deu', 'ewe']": 2,
             "['cat', 'ron']": 4,
             "['ces', 'ita']": 18,
             "['est', 'fra']": 14,
             "['hin', 'mal']": 4})


df = pd.DataFrame(list(pairs.iteritems()), columns = ['column1','column2'])
print df
           column1  column2
0   ['por', 'rus']       80
1   ['est', 'fra']       14
2   ['nld', 'slv']        4
3   ['jpn', 'pol']       48
4   ['ces', 'epo']        4
5   ['hin', 'mal']        4
6   ['oci', 'ron']        4
7   ['lit', 'mkd']        2
8   ['deu', 'ewe']        2
9   ['cat', 'ron']        4
10  ['ces', 'ita']       18

print type(df.at[0,'column1'])
<type 'str'>

You can first change string list to list by literal_eval, then create DataFrame from_records and last concat column2:

#change type string to list
df['column1'] = df['column1'].apply(literal_eval)
print df
       column1  column2
0   [por, rus]       80
1   [est, fra]       14
2   [nld, slv]        4
3   [jpn, pol]       48
4   [ces, epo]        4
5   [hin, mal]        4
6   [oci, ron]        4
7   [lit, mkd]        2
8   [deu, ewe]        2
9   [cat, ron]        4
10  [ces, ita]       18

print type(df.at[0,'column1'])
<type 'list'>

df1 = pd.DataFrame.from_records([x for x in df['column1']], columns=['a','b'])
print df1
      a    b
0   por  rus
1   est  fra
2   nld  slv
3   jpn  pol
4   ces  epo
5   hin  mal
6   oci  ron
7   lit  mkd
8   deu  ewe
9   cat  ron
10  ces  ita

print pd.concat([df1, df['column2']], axis=1)
      a    b  column2
0   por  rus       80
1   est  fra       14
2   nld  slv        4
3   jpn  pol       48
4   ces  epo        4
5   hin  mal        4
6   oci  ron        4
7   lit  mkd        2
8   deu  ewe        2
9   cat  ron        4
10  ces  ita       18

Or use str.strip and replace for cleaning column1, then create new DataFrame by split and concat column2:

df['column1'] = df['column1'].str.strip('[]').str.replace("'","")
print df
     column1  column2
0   por, rus       80
1   est, fra       14
2   nld, slv        4
3   jpn, pol       48
4   ces, epo        4
5   hin, mal        4
6   oci, ron        4
7   lit, mkd        2
8   deu, ewe        2
9   cat, ron        4
10  ces, ita       18

df1 = df['column1'].str.split(",", expand=True)
df1.columns = ['a','b']
print df1
      a     b
0   por   rus
1   est   fra
2   nld   slv
3   jpn   pol
4   ces   epo
5   hin   mal
6   oci   ron
7   lit   mkd
8   deu   ewe
9   cat   ron
10  ces   ita

print pd.concat([df1, df['column2']], axis=1)
      a     b  column2
0   por   rus       80
1   est   fra       14
2   nld   slv        4
3   jpn   pol       48
4   ces   epo        4
5   hin   mal        4
6   oci   ron        4
7   lit   mkd        2
8   deu   ewe        2
9   cat   ron        4
10  ces   ita       18

Upvotes: 2

roadrunner66

Reputation: 7941

Is this what you want?

import re

mydict=  {"['por', 'rus']": 80,
         "['nld', 'slv']": 4,
         "['jpn', 'pol']": 48,
         "['ces', 'epo']": 4,
         "['oci', 'ron']": 4,
         "['lit', 'mkd']": 2,
         "['deu', 'ewe']": 2,
         "['cat', 'ron']": 4,
         "['ces', 'ita']": 18,
         "['est', 'fra']": 14,
         "['hin', 'mal']": 4}


# this is where you seem to be stuck
for k,v in mydict.iteritems():
    print k,v    # keys are still strings, not lists

# this is the resolution, separation of the keys into two strings    
for k,v in mydict.iteritems():
    a=re.findall('\w{3}',k) 
    print a[0],a[1],v

output:

['por', 'rus'] 80
['nld', 'slv'] 4
['jpn', 'pol'] 48
['ces', 'epo'] 4
['oci', 'ron'] 4
['lit', 'mkd'] 2
['deu', 'ewe'] 2
['cat', 'ron'] 4
['ces', 'ita'] 18
['est', 'fra'] 14
['hin', 'mal'] 4
por rus 80
nld slv 4
jpn pol 48
ces epo 4
oci ron 4
lit mkd 2
deu ewe 2
cat ron 4
ces ita 18
est fra 14
hin mal 4

Now you can append them to lists if you like:

 x,y,z=[],[],[]
    for k,v in mydict.iteritems():
        a=re.findall('\w{3}',k) 
        x.append(a[0])
        y.append(a[1])
        z.append(v)
print x,y,z

Or if you like a pandas Dataframe:

import pandas as pd
df = pd.DataFrame({'a': x, 'b': y,'c':z})
print df

output:

['por', 'nld', 'jpn', 'ces', 'oci', 'lit', 'deu', 'cat', 'ces', 'est', 'hin'] ['rus', 'slv', 'pol', 'epo', 'ron', 'mkd', 'ewe', 'ron', 'ita', 'fra', 'mal'] [80, 4, 48, 4, 4, 2, 2, 4, 18, 14, 4]
      a    b   c
0   por  rus  80
1   nld  slv   4
2   jpn  pol  48
3   ces  epo   4
4   oci  ron   4
5   lit  mkd   2
6   deu  ewe   2
7   cat  ron   4
8   ces  ita  18
9   est  fra  14
10  hin  mal   4

Upvotes: 3

Creating different columns in dataframe from a key pair in a dictionary

Answers (3)

Related Questions