Reputation: 1323
I have a pandas dataframe
with columns val_min
and val_max
below:
df
val_min val_max
0 1001.0 1500.0
3 401.0 500.0
4 401.0 500.0
5 401.0 500.0
6 501.0 600.0
Now I am trying to create a new column using these two columns with pandas apply
function and create a new column based on some certain functions defined in my function, see my attempt below:
def get_range(df, val_min, val_max, new_col):
if df[val_min] <500 & df[val_max]<500:
df[new_col] = "<500"
elif df[val_min] >500 & df[val_max]<=1000:
df[new_col] = "500 - 1000"
elif df[val_min] >1000 & df[val_max]<=2000:
df[new_col] = "1000 - 2000"
elif df[val_min] >2000 & df[val_max]<=5000:
df[new_col] = "2000 - 5000"
elif df[val_min] >5000 & df[val_max]<=7500:
df[new_col] = "5000 - 7500"
elif df[val_min] >7500 & df[val_max]<=10000:
df[new_col] = "7500 - 10000"
elif df[val_min] >10000 & df[val_max]<=12000:
df[new_col] = "10000 - 12000"
elif df[val_min] >12000 & df[val_max]<=15000:
df[new_col] = "12000 - 15000"
elif df[val_min] >15000 & df[val_max]>15000:
df[new_col] = ">15000"
df["new_col"] = df[["val_min", "val_max"]].apply(get_range, axis=1)
Upvotes: 1
Views: 121
Reputation: 5037
Consider using np.select
import numpy as np
cases = [(df.val_min < 500) & (df.val_max < 500),
(df.val_min > 500) & (df.val_max <= 1000),
(df.val_min > 1000) & (df.val_max <= 2000),
(df.val_min > 2000) & (df.val_max <= 5000),
(df.val_min > 5000) & (df.val_max <= 7500),
(df.val_min > 7500) & (df.val_max <= 10000),
(df.val_min > 10000) &( df.val_max <= 12000),
(df.val_min > 12000) &( df.val_max <= 15000),
(df.val_min > 15000) &( df.val_max > 15000)]
values = ['<500',
'500 - 1000',
'1000 - 2000',
'2000 - 5000',
'5000 - 7500',
'7500 - 10000',
'10000 - 12000',
'12000 - 15000',
'>15000']
df['new_col'] = np.select(cases, values, default='')
print(df)
Output
val_min val_max new_col
0 1001.0 1500.0 1000 - 2000
1 401.0 500.0
2 401.0 500.0
3 401.0 500.0
4 501.0 600.0 500 - 1000
Upvotes: 1
Reputation: 1307
you can use apply
with lambda
df = pd.DataFrame({"val_min":list(range(10,1000,100)),"val_max":list(range(20,2000,200))})
def get_range(val_min, val_max):
if val_min <500 & val_max<500:
new_col = "<500"
elif val_min >500 & val_max<=1000:
new_col = "500 - 1000"
elif val_min >1000 & val_max<=2000:
new_col = "1000 - 2000"
elif val_min >2000 & val_max<=5000:
new_col = "2000 - 5000"
elif val_min >5000 & val_max<=7500:
new_col = "5000 - 7500"
elif val_min >7500 & val_max<=10000:
new_col = "7500 - 10000"
elif val_min >10000 & val_max<=12000:
new_col = "10000 - 12000"
elif val_min >12000 & val_max<=15000:
new_col = "12000 - 15000"
elif val_min >15000 & val_max>15000:
new_col = ">15000"
return new_col
df["new_col"] = df[["val_min", "val_max"]].apply(lambda row :get_range(*row), axis=1)
print(df)
val_min val_max new_col
0 10 20 <500
1 110 220 <500
2 210 420 <500
3 310 620 500 - 1000
4 410 820 500 - 1000
5 510 1020 500 - 1000
6 610 1220 500 - 1000
7 710 1420 500 - 1000
8 810 1620 500 - 1000
9 910 1820 500 - 1000
[Finished in 1.8s]
Upvotes: 0
Reputation: 51
could you try this ? it might work
def get_range(df, val_min, val_max, new_col):
if val_min <500 & val_max <500:
return "<500"
elif val_min >500 & val_max <=1000:
return "500 - 1000"
[...]
df["new_col"] = df[["val_min", "val_max"]].apply(get_range, axis=1)
Upvotes: 0
Reputation: 6483
You should change the function's structure to this:
def get_range(x):
val_min=x.val_min
val_max=x.val_max
if (val_min <500)&(val_max<500):
return "<500"
elif (val_min >500)&(val_max<=1000):
return "500 - 1000"
...
And then apply this to the dataframe:
df["new_col"] = df.apply(get_range, axis=1)
So, for example:
import pandas as pd
def get_range(x):
val_min=x.val_min
val_max=x.val_max
if (val_min <500 )&(val_max<500):
return "<500"
elif (val_min>500)&(val_max<=1000):
return "500 - 1000"
df = pd.DataFrame({'val_min':[600, 34], 'val_max':[800, 340]})
df["new_col"] = df.apply(get_range, axis=1)
Output:
val_min val_max new_col
0 600 800 500 - 1000
1 34 340 <500
Upvotes: 1