REnuka Perera
REnuka Perera

Reputation: 53

Unsupported operand types with df.copy() method

I'm uploading my dataset, and I'm copying my dataset, but an error is appearing.

import numpy as np
import pandas as pd
import mathplotlib.pyplot as plt

house_data=pd.read_csv("/home/houseprice.csv")

#we evaluate the price of a house for those cases where the information is missing, for each variable
def analyse_na_value(df, var):
    df - df.copy()
    
    # we indicate as a variable as 1 where the observation is missing
    # we indicate as 0 where the observation has a real value
    df[var] = np.where(df[var].isnull(), 1 , 0)
    #print(df[var].isnull())
    
    # we calculate the mean saleprice where the information is missing or present
    df.groupby(var)['SalePrice'].median().plot.bar()
    plt.title(var)
    plt.show()
    
    
for var in vars_with_na:
    analyse_na_value(house_data, var)

error,when I comment this code line, I don't get an error

df - df.copy()
TypeError                                 Traceback (most recent call last)
~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in na_arithmetic_op(left, right, op, is_cmp)
    142     try:
--> 143         result = expressions.evaluate(op, left, right)
    144     except TypeError:

~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py in evaluate(op, a, b, use_numexpr)
    232         if use_numexpr:
--> 233             return _evaluate(op, op_str, a, b)  # type: ignore
    234     return _evaluate_standard(op, op_str, a, b)

~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_numexpr(op, op_str, a, b)
    118     if result is None:
--> 119         result = _evaluate_standard(op, op_str, a, b)
    120 

~/anaconda3/lib/python3.8/site-packages/pandas/core/computation/expressions.py in _evaluate_standard(op, op_str, a, b)
     67     with np.errstate(all="ignore"):
---> 68         return op(a, b)
     69 

TypeError: unsupported operand type(s) for -: 'str' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-31-25d58bc46c86> in <module>
     15 
     16 for var in vars_with_na:
---> 17     analyse_na_value(house_data, var)

<ipython-input-31-25d58bc46c86> in analyse_na_value(df, var)
      1 #we evaluate the price of a house for those cases where the information is missing, for each variable
      2 def analyse_na_value(df, var):
----> 3     df - df.copy()
      4 
      5     # we indicate as a variable as 1 where the observation is missing

~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/__init__.py in f(self, other, axis, level, fill_value)
    649         if isinstance(other, ABCDataFrame):
    650             # Another DataFrame
--> 651             new_data = self._combine_frame(other, na_op, fill_value)
    652 
    653         elif isinstance(other, ABCSeries):

~/anaconda3/lib/python3.8/site-packages/pandas/core/frame.py in _combine_frame(self, other, func, fill_value)
   5864                 return func(left, right)
   5865 
-> 5866         new_data = ops.dispatch_to_series(self, other, _arith_op)
   5867         return new_data
   5868 

~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/__init__.py in dispatch_to_series(left, right, func, axis)
    273         #  _frame_arith_method_with_reindex
    274 
--> 275         bm = left._mgr.operate_blockwise(right._mgr, array_op)
    276         return type(left)(bm)
    277 

~/anaconda3/lib/python3.8/site-packages/pandas/core/internals/managers.py in operate_blockwise(self, other, array_op)
    362         Apply array_op blockwise with another (aligned) BlockManager.
    363         """
--> 364         return operate_blockwise(self, other, array_op)
    365 
    366     def apply(self: T, f, align_keys=None, **kwargs) -> T:

~/anaconda3/lib/python3.8/site-packages/pandas/core/internals/ops.py in operate_blockwise(left, right, array_op)
     36             lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea)
     37 
---> 38             res_values = array_op(lvals, rvals)
     39             if left_ea and not right_ea and hasattr(res_values, "reshape"):
     40                 res_values = res_values.reshape(1, -1)

~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in arithmetic_op(left, right, op)
    188     else:
    189         with np.errstate(all="ignore"):
--> 190             res_values = na_arithmetic_op(lvalues, rvalues, op)
    191 
    192     return res_values

~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in na_arithmetic_op(left, right, op, is_cmp)
    148             #  will handle complex numbers incorrectly, see GH#32047
    149             raise
--> 150         result = masked_arith_op(left, right, op)
    151 
    152     if is_cmp and (is_scalar(result) or result is NotImplemented):

~/anaconda3/lib/python3.8/site-packages/pandas/core/ops/array_ops.py in masked_arith_op(x, y, op)
     90         if mask.any():
     91             with np.errstate(all="ignore"):
---> 92                 result[mask] = op(xrav[mask], yrav[mask])
     93 
     94     else:

TypeError: unsupported operand type(s) for -: 'str' and 'str'

1

As far to what I know the copy() function works with python3, but in pandas, and python3 does it work I don't know. How can I get rid of this error without commenting that code line?

Upvotes: 2

Views: 82

Answers (1)

OldWizard007
OldWizard007

Reputation: 370

I think you are supposed to do df = df.copy(). I would recommend changing the variable though. Here is an official Pandas documentation on this function. What you are doing is subtracting the data frame from itself...

Upvotes: 2

Related Questions