Shanoo
Shanoo

Reputation: 1255

Creating columns in one dataframe based on the values of a column in another dataset

I have two pandas dataframe

import pandas as pd 
import numpy as np
import datetime

# intialise data of lists. 
data = {'group'      :["A","A","A","B","B","B","B"],
        'val': ["AA","AB","AC","B1","B2","AA","AB"],
        'cal1'     :[4,5,7,6,5,8,9],
        'cal2'     :[10,100,100,10,1,10,100]
       } 

# Create DataFrame 
df1 = pd.DataFrame(data) 
df1

    group   val cal1    cal2
0   A       AA  4       10
1   A       AB  5       100
2   A       AC  7       100
3   B       B1  6       10
4   B       B2  5       1
5   B       AA  8       10
6   B       AB  9       100

import pandas as pd 
import numpy as np
import datetime

# intialise data of lists. 
data = {'group'      :["A","A","A","B","B","B","B"],
        'flag' : [1,0,0,1,0,0,0],
        'var1': [1,2,3,7,8,9,10]
       } 

# Create DataFrame 
df2 = pd.DataFrame(data) 
df2

   group   flag var1
0   A       1   1
1   A       0   2
2   A       0   3
3   B       1   7
4   B       0   8
5   B       0   9
6   B       0   10

Step 1: CReate columns in df2 based on unique "val" in df1 like below:

unique_val = df1['val'].unique().tolist()
new_cols = [t + '_new' for t in unique_val]
for i in new_cols:
    df2[i] = 0
df2
    group   flag    var1    AA_new  AB_new  AC_new  B1_new  B2_new
0   A       1       1       0       0       0       0        0
1   A       0       2       0       0       0       0        0
2   A       0       3       0       0       0       0        0
3   B       1       7       0       0       0       0        0
4   B       0       8       0       0       0       0        0
5   B       0       9       0       0       0       0        0
6   B       0       10      0       0       0       0        0

Step 2: for row where flag = 1, AA_new will be calculated as var1(from df2)* value of 'cal1' from df1 for group "A" and val "AA" * value of 'cal2' from df1 for group "A" and val "AA", similarly AB_new will be calculated as var1(from df2) * value of 'cal1' from df1 for group "A" and val "AB" * value of 'cal2' from df1 for group "A" and val "AB"

My expected output should look like below:

    group   flag    var1    AA_new  AB_new  AC_new  B1_new  B2_new
0   A       1       1       40      500     700     0        0
1   A       0       2       0       0       0       0        0
2   A       0       3       0       0       0       0        0
3   B       1       7       570     6300    0       420      35
4   B       0       8       0       0       0       0        0
5   B       0       9       0       0       0       0        0
6   B       0       10      0       0       0       0        0

Upvotes: 1

Views: 69

Answers (1)

ansev
ansev

Reputation: 30920

Use DataFrame.pivot_table with GroupBy.bfill an then we can use DataFrame.mul.

df2.assign(**df1.pivot_table(columns='val',
                             values='cal',
                             index = ['group', df2.index])
                .add_suffix('_new')
                .groupby(level=0)
               #.apply(lambda x: x.bfill().ffill()) #maybe neccesary instead bfill
                .bfill()
                .reset_index(level='group',drop='group')
                .fillna(0)
                .mul(df2['var1'], axis=0)
                .where(df2['flag'].eq(1), 0)
               #.astype(int) # if you want int
)

Output

  group  flag  var1  AA_new  AB_new  AC_new  B1_new  B2_new
0     A     1     1     4.0     5.0     7.0     0.0     0.0
1     A     0     2     0.0     0.0     0.0     0.0     0.0
2     A     0     3     0.0     0.0     0.0     0.0     0.0
3     B     1     7    56.0    63.0     0.0    42.0    35.0
4     B     0     8     0.0     0.0     0.0     0.0     0.0
5     B     0     9     0.0     0.0     0.0     0.0     0.0
6     B     0    10     0.0     0.0     0.0     0.0     0.0

EDIT

df2.assign(**df1.assign(mul_cal = df1['cal1'].mul(df1['cal2']))
                .pivot_table(columns='val',
                             values='mul_cal',
                             index = ['group', df2.index])
                .add_suffix('_new')
                .groupby(level=0)
               #.apply(lambda x: x.bfill().ffill()) #maybe neccesary instead bfill
                .bfill()
                .reset_index(level='group',drop='group')
                .fillna(0)
                .mul(df2['var1'], axis=0)
                .where(df2['flag'].eq(1), 0)
               #.astype(int) # if you want int
)


  group  flag  var1  AA_new  AB_new  AC_new  B1_new  B2_new
0     A     1     1    40.0   500.0   700.0     0.0     0.0
1     A     0     2     0.0     0.0     0.0     0.0     0.0
2     A     0     3     0.0     0.0     0.0     0.0     0.0
3     B     1     7   560.0  6300.0     0.0   420.0    35.0
4     B     0     8     0.0     0.0     0.0     0.0     0.0
5     B     0     9     0.0     0.0     0.0     0.0     0.0
6     B     0    10     0.0     0.0     0.0     0.0     0.0

Upvotes: 1

Related Questions