Reputation: 87
I have two dfs and I would like to create a modified 3rd df that adds a character string to the value of df1 based on the same position (row x column) in df2. My for loop does not seem to be working correctly, and is not adding the character string as anticipated. Where am I going wrong with my forloops? I've tried several versions of .iterrows() and .iterritems() to no avail.
data1 = {'col1' :['2.28', '-0.38', '0.16', '-0.00', '-0.11', '0.00', '-0.00','0.92', '0.58', '0.90', '0.80'],
'col2': ['2.23', '-0.38', '0.17', '-0.00', '-0.10', '0.00', '-0.00','0.89', '0.57', '0.89', '0.77'],
'col3': ['2.25', '-0.31', '0.17', '0.00', '-0.10', '0.00', '-0.00','0.88', '0.55', '0.89', '0.78'],
'col4': ['2.22', '-0.16', '0.17', '0.00', '-0.08', '0.00', '-0.00','0.85', '0.52', '0.85', '0.76']}
df1 = pd.DataFrame(data1)
data2 = {'col1' :[0.0043463 , 0.02835221, 0.01998397, 0.95802428, 0.30099018,
0.88572679, 0.49670165, 0.24806438, 0.46116773, 0.26479583,
0.30652824],
'col2': [0.00557248, 0.14322473, 0.01656108, 0.99548637, 0.32824552,
0.78092598, 0.45962261, 0.26562723, 0.47006285, 0.27588199,
0.32995383],
'col3': [0.00516395, 0.1432596 , 0.01855883, 0.95806069, 0.33487223,
0.90526158, 0.373449 , 0.27230896, 0.48722896, 0.27608198,
0.323059 ],
'col4': [0.00476079, 0.08205838, 0.03080842, 0.8642895 , 0.46951708,
0.75946821, 0.25630978, 0.30713967, 0.52637885, 0.29876842,
0.36094742]}
df2 = pd.DataFrame(data2)
final_df = pd.DataFrame()
for idx1,c in enumerate(df1):
for idx2,p in enumerate(df2):
final_df[c] = np.where(df2[p] < .05, df1[c]+'*',
np.where(df2[p] <.01,df1[c]+'**',
np.where(df2[p] <.001 , df1[c]+'***',df1[c])))
final_df = pd.DataFrame(final_df)
final_df
desired output:
col1 col2 col3 col4
0 2.28** 2.23** 2.25** 2.22**
1 -0.38* -0.38 -0.31 -0.16
2 0.16* 0.17* 0.17* 0.17*
3 -0.00 -0.00 0.00 0.00
4 -0.11 -0.10 -0.10 -0.08
Upvotes: 0
Views: 1592
Reputation: 1128
I don't think this is the best answer but I believe it achieves the result you're looking for.
Firstly, I don't see any reason to iterate through df2 since you are using np.where to compare the same column in both dataframes.
import pandas as pd
import numpy as np
data1 = {'col1' :['2.28', '-0.38', '0.16', '-0.00', '-0.11', '0.00', '-0.00','0.92', '0.58', '0.90', '0.80'],
'col2': ['2.23', '-0.38', '0.17', '-0.00', '-0.10', '0.00', '-0.00','0.89', '0.57', '0.89', '0.77'],
'col3': ['2.25', '-0.31', '0.17', '0.00', '-0.10', '0.00', '-0.00','0.88', '0.55', '0.89', '0.78'],
'col4': ['2.22', '-0.16', '0.17', '0.00', '-0.08', '0.00', '-0.00','0.85', '0.52', '0.85', '0.76']}
df1 = pd.DataFrame(data1)
data2 = {'col1' :[0.0043463 , 0.02835221, 0.01998397, 0.95802428, 0.30099018,
0.88572679, 0.49670165, 0.24806438, 0.46116773, 0.26479583,
0.30652824],
'col2': [0.00557248, 0.14322473, 0.01656108, 0.99548637, 0.32824552,
0.78092598, 0.45962261, 0.26562723, 0.47006285, 0.27588199,
0.32995383],
'col3': [0.00516395, 0.1432596 , 0.01855883, 0.95806069, 0.33487223,
0.90526158, 0.373449 , 0.27230896, 0.48722896, 0.27608198,
0.323059 ],
'col4': [0.00476079, 0.08205838, 0.03080842, 0.8642895 , 0.46951708,
0.75946821, 0.25630978, 0.30713967, 0.52637885, 0.29876842,
0.36094742]}
df2 = pd.DataFrame(data2)
final_df = pd.DataFrame()
for idx1, c in enumerate(df1):
# just add a * for each level, and after the first modifcation reference itself(final_df) so we don't overwrite any *'s
final_df[c] = np.where(df2[c] < .05, df1[c]+'*',df1[c])
final_df[c] = np.where(df2[c] <.01,final_df[c]+'*', final_df[c])
final_df[c] = np.where(df2[c] <.001 , final_df[c]+'*', final_df[c])
final_df = pd.DataFrame(final_df)
final_df
Output:
col1 col2 col3 col4
[0: 2.28** 2.23** 2.25** 2.22**],
[1: -0.38* -0.38 -0.31 -0.16],
[2: 0.16* 0.17* 0.17* 0.17*],
[3: -0.00 -0.00 0.00 0.00],
[4: -0.11 -0.10 -0.10 -0.08],
[5: 0.00 0.00 0.00 0.00],
[6: -0.00 -0.00 -0.00 -0.00],
[7: 0.92 0.89 0.88 0.85]...
Upvotes: 1