melolilili
melolilili

Reputation: 249

TypeError: '<' not supported between instances of 'int' and 'str' when performing Wilcoxon Rank-Sum test on two pandas dataframes

I am trying to perform a Wilcoxon rank-sum test between two data frames kirc and normal. I would like to perform the test between the columns. My code raised TypeError: '<' not supported between instances of 'int' and 'str'.

from scipy.stats import ranksums
import pandas as pd

kirc = mrna.loc[mrna['subtype'] == "KIRC"].iloc[:,:-2]
normal = mrna.loc[mrna['subtype'] == "normal"].iloc[:,:-2]

For each row of the two dataframes, I want to perform the Wilcoxon rank sum test to find differential values between corresponding columns.

for i in normal.T.iterrows():
    for j in kirc.T.iterrows():
        ranksums(i, j)

Traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     56     try:
---> 57         return bound(*args, **kwds)
     58     except TypeError:

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
     68 
---> 69         return method(self, other)
     70 

/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
     39     def __lt__(self, other):
---> 40         return self._cmp_method(other, operator.lt)
     41 

/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
   5501         with np.errstate(all="ignore"):
-> 5502             res_values = ops.comparison_op(lvalues, rvalues, op)
   5503 

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
    283     elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
    285 

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
     72     else:
---> 73         result = libops.scalar_compare(x.ravel(), y, op)
     74     return result.reshape(x.shape)

/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()

TypeError: '<' not supported between instances of 'int' and 'str'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-44-ea48324537c3> in <module>
      3 for i in normal.T.iterrows():
      4     for j in kirc.T.iterrows():
----> 5         ranksums(i, j)
      6 

/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in ranksums(x, y)
   6469     n2 = len(y)
   6470     alldata = np.concatenate((x, y))
-> 6471     ranked = rankdata(alldata)
   6472     x = ranked[:n1]
   6473     s = np.sum(x, axis=0)

/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in rankdata(a, method)
   7380     arr = np.ravel(np.asarray(a))
   7381     algo = 'mergesort' if method == 'ordinal' else 'quicksort'
-> 7382     sorter = np.argsort(arr, kind=algo)
   7383 
   7384     inv = np.empty(sorter.size, dtype=np.intp)

<__array_function__ internals> in argsort(*args, **kwargs)

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in argsort(a, axis, kind, order)
   1112 
   1113     """
-> 1114     return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
   1115 
   1116 

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     64         # Call _wrapit from within the except clause to ensure a potential
     65         # exception has a traceback chain.
---> 66         return _wrapit(obj, method, *args, **kwds)
     67 
     68 

/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args, **kwds)
     41     except AttributeError:
     42         wrap = None
---> 43     result = getattr(asarray(obj), method)(*args, **kwds)
     44     if wrap:
     45         if not isinstance(result, mu.ndarray):

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
     67         other = item_from_zerodim(other)
     68 
---> 69         return method(self, other)
     70 
     71     return new_method

/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
     38     @unpack_zerodim_and_defer("__lt__")
     39     def __lt__(self, other):
---> 40         return self._cmp_method(other, operator.lt)
     41 
     42     @unpack_zerodim_and_defer("__le__")

/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
   5500 
   5501         with np.errstate(all="ignore"):
-> 5502             res_values = ops.comparison_op(lvalues, rvalues, op)
   5503 
   5504         return self._construct_result(res_values, name=res_name)

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
    282 
    283     elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284         res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
    285 
    286     else:

/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
     71         result = libops.vec_compare(x.ravel(), y.ravel(), op)
     72     else:
---> 73         result = libops.scalar_compare(x.ravel(), y, op)
     74     return result.reshape(x.shape)
     75 

/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()

TypeError: '<' not supported between instances of 'int' and 'str'

kirc data

pd.DataFrame({'A1CF': {'TCGA-A3-3307-01': 750,
  'TCGA-A3-3308-01': 579,
  'TCGA-A3-3311-01': 2186,
  'TCGA-A3-3313-01': 220},
 'A2BP1': {'TCGA-A3-3307-01': 0,
  'TCGA-A3-3308-01': 7,
  'TCGA-A3-3311-01': 6,
  'TCGA-A3-3313-01': 43},
 'A2LD1': {'TCGA-A3-3307-01': 460,
  'TCGA-A3-3308-01': 433,
  'TCGA-A3-3311-01': 692,
  'TCGA-A3-3313-01': 1534},
 'A2ML1': {'TCGA-A3-3307-01': 64,
  'TCGA-A3-3308-01': 177,
  'TCGA-A3-3311-01': 134,
  'TCGA-A3-3313-01': 693}})

normal data

pd.DataFrame({'A1CF': {'TCGA-A3-3387-11': 2728,
  'TCGA-B0-4700-11': 434,
  'TCGA-B0-4712-11': 11,
  'TCGA-B0-5402-11': 640},
 'A2BP1': {'TCGA-A3-3387-11': 45,
  'TCGA-B0-4700-11': 14,
  'TCGA-B0-4712-11': 74,
  'TCGA-B0-5402-11': 60},
 'A2LD1': {'TCGA-A3-3387-11': 6614,
  'TCGA-B0-4700-11': 1178,
  'TCGA-B0-4712-11': 1201,
  'TCGA-B0-5402-11': 1058},
 'A2ML1': {'TCGA-A3-3387-11': 46,
  'TCGA-B0-4700-11': 26,
  'TCGA-B0-4712-11': 78,
  'TCGA-B0-5402-11': 47}})

Upvotes: 2

Views: 94

Answers (1)

Bushmaster
Bushmaster

Reputation: 4608

Because you're using iterrows() function wrong. here is an example i value:

for i in normal.T[0:1].iterrows():
    print(i)

#output
'''
('A1CF', TCGA-A3-3387-11    2728
TCGA-B0-4700-11     434
TCGA-B0-4712-11      11
TCGA-B0-5402-11     640
Name: A1CF, dtype: int64)
'''

use this:

for i,j in normal.T.iterrows():
    print(j['TCGA-A3-3387-11']) #you have to enter the name of the column you want to use here:

#output
'''
2728
45
6614
46
'''

Also, using iterrows() is not recommended. Because it is very slow. You can use df.to_dict() instead. You should take a look at this article.

Here is an example usage:

for j in normal.T.to_dict('records'):
    print(j['TCGA-A3-3387-11'])

#output
'''
2728
45
6614
46
'''

Upvotes: 1

Related Questions