Pandas function with multiple conditions - Value error

Question

I am trying to create a function with multiple conditions. This would add new column with a calculated amount. Part of it is below.

I have looked at other answers but can't work out what is wrong.

# import modules
import pandas as pd
import numpy as np

#data
data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['no', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})

# function
def include_gst(df):
    rounded = 0.0
    if(data2['Unit of Measure'] == 'EA' and data2['round'] == 'no' and data2['add_gst'] == 'yes'):
        rounded = data['Unit Charge'] * data['Time Units']
        rounded = rounded.round(2) # to two decimal places
        rounded = rounded * 1.10 # add gst
        rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
        rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
        rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
        return  rounded
    else:
        return 1.0

data2['incl_gst'] = data2.apply(include_gst, axis = 1)

Error:

ValueError: ('The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().', 'occurred at index 0')

It would be good to get it to work. Thanks.

jezrael · Accepted Answer

Here under the hood loops by apply are not necessary, you canvectorized solution - added parantheses because priority operator with & for bitwise AND to mask and new column is created by numpy.where:

#changed data for match first row
data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['yes', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units
0              EA    no     yes          1.8         1.0
1              EA   yes     yes          2.5         1.5
2              AA    no      no          3.0         2.0
  Unit of Measure round add_gst  Unit Charge  Time Units  

m = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'no') & (data2['add_gst'] == 'yes')
rounded = data2['Unit Charge'] * data2['Time Units']
rounded = rounded.round(2) # to two decimal places
rounded = rounded * 1.10 # add gst
rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05


data2['incl_gst'] = np.where(m, rounded, 1.0)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0       2.0
1              EA   yes     yes          2.5         1.5       1.0
2              AA    no      no          3.0         2.0       1.0

EDIT:

Because 12 times elif statement solution should be cahnged:

def include_gst(df):
    rounded = 0.0
    if (df['Unit of Measure'] == 'EA') and (df['round'] == 'no') and (df['add_gst'] == 'yes'):
        rounded = df['Unit Charge'] * df['Time Units']
        rounded = round(rounded, 2) # to two decimal places
        rounded = rounded * 1.10 # add gst
        rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
        rounded = round(rounded) # round to nearest five cents - step 2 - to nearest integer
        rounded = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
        return  rounded
    else:
        return 1.0

data2['incl_gst'] = data2.apply(include_gst, axis = 1)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0       2.0
1              EA   yes     yes          2.5         1.5       1.0
2              AA    no      no          3.0         2.0       1.0

Or:

data2 = pd.DataFrame({'Unit of Measure' : ['EA', 'EA', 'AA'],
                     'round' : ['no', 'yes', 'no'],
                     'add_gst' : ['yes', 'yes', 'no'],
                     'Unit Charge' : [1.8, 2.5, 3.0],
                     'Time Units' : [1.0, 1.5, 2.0]})
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units
0              EA    no     yes          1.8         1.0
1              EA   yes     yes          2.5         1.5
2              AA    no      no          3.0         2.0

m1 = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'no') & (data2['add_gst'] == 'yes')
m2 = (data2['Unit of Measure'] == 'EA') & (data2['round'] == 'yes') & (data2['add_gst'] == 'yes')

rounded = data2['Unit Charge'] * data2['Time Units']
rounded = rounded.round(2) # to two decimal places
rounded = rounded * 1.10 # add gst
rounded = rounded / 0.05 # round to nearest five cents - step 1 - divide by 0.05
rounded = rounded.round() # round to nearest five cents - step 2 - to nearest integer
rounded1 = rounded * 0.05 # round to nearest five cents - step 3 - multiply by 0.05
rounded2 = rounded * 0.08

data2['incl_gst'] = np.select([m1, m2], [rounded1, rounded2], default=1.0)
print (data2)
  Unit of Measure round add_gst  Unit Charge  Time Units  incl_gst
0              EA    no     yes          1.8         1.0      2.00
1              EA   yes     yes          2.5         1.5      6.56
2              AA    no      no          3.0         2.0      1.00

Pandas function with multiple conditions - Value error

Answers (1)

Related Questions