Reputation: 249
I am trying to perform a Wilcoxon rank-sum test between two data frames kirc
and normal
. I would like to perform the test between the columns.
My code raised TypeError: '<' not supported between instances of 'int' and 'str'
.
from scipy.stats import ranksums
import pandas as pd
kirc = mrna.loc[mrna['subtype'] == "KIRC"].iloc[:,:-2]
normal = mrna.loc[mrna['subtype'] == "normal"].iloc[:,:-2]
For each row of the two dataframes, I want to perform the Wilcoxon rank sum test to find differential values between corresponding columns.
for i in normal.T.iterrows():
for j in kirc.T.iterrows():
ranksums(i, j)
Traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
56 try:
---> 57 return bound(*args, **kwds)
58 except TypeError:
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
68
---> 69 return method(self, other)
70
/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
39 def __lt__(self, other):
---> 40 return self._cmp_method(other, operator.lt)
41
/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
5501 with np.errstate(all="ignore"):
-> 5502 res_values = ops.comparison_op(lvalues, rvalues, op)
5503
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
283 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
285
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
72 else:
---> 73 result = libops.scalar_compare(x.ravel(), y, op)
74 return result.reshape(x.shape)
/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()
TypeError: '<' not supported between instances of 'int' and 'str'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-44-ea48324537c3> in <module>
3 for i in normal.T.iterrows():
4 for j in kirc.T.iterrows():
----> 5 ranksums(i, j)
6
/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in ranksums(x, y)
6469 n2 = len(y)
6470 alldata = np.concatenate((x, y))
-> 6471 ranked = rankdata(alldata)
6472 x = ranked[:n1]
6473 s = np.sum(x, axis=0)
/opt/conda/lib/python3.7/site-packages/scipy/stats/stats.py in rankdata(a, method)
7380 arr = np.ravel(np.asarray(a))
7381 algo = 'mergesort' if method == 'ordinal' else 'quicksort'
-> 7382 sorter = np.argsort(arr, kind=algo)
7383
7384 inv = np.empty(sorter.size, dtype=np.intp)
<__array_function__ internals> in argsort(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in argsort(a, axis, kind, order)
1112
1113 """
-> 1114 return _wrapfunc(a, 'argsort', axis=axis, kind=kind, order=order)
1115
1116
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
64 # Call _wrapit from within the except clause to ensure a potential
65 # exception has a traceback chain.
---> 66 return _wrapit(obj, method, *args, **kwds)
67
68
/opt/conda/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args, **kwds)
41 except AttributeError:
42 wrap = None
---> 43 result = getattr(asarray(obj), method)(*args, **kwds)
44 if wrap:
45 if not isinstance(result, mu.ndarray):
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/common.py in new_method(self, other)
67 other = item_from_zerodim(other)
68
---> 69 return method(self, other)
70
71 return new_method
/opt/conda/lib/python3.7/site-packages/pandas/core/arraylike.py in __lt__(self, other)
38 @unpack_zerodim_and_defer("__lt__")
39 def __lt__(self, other):
---> 40 return self._cmp_method(other, operator.lt)
41
42 @unpack_zerodim_and_defer("__le__")
/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in _cmp_method(self, other, op)
5500
5501 with np.errstate(all="ignore"):
-> 5502 res_values = ops.comparison_op(lvalues, rvalues, op)
5503
5504 return self._construct_result(res_values, name=res_name)
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comparison_op(left, right, op)
282
283 elif is_object_dtype(lvalues.dtype) or isinstance(rvalues, str):
--> 284 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
285
286 else:
/opt/conda/lib/python3.7/site-packages/pandas/core/ops/array_ops.py in comp_method_OBJECT_ARRAY(op, x, y)
71 result = libops.vec_compare(x.ravel(), y.ravel(), op)
72 else:
---> 73 result = libops.scalar_compare(x.ravel(), y, op)
74 return result.reshape(x.shape)
75
/opt/conda/lib/python3.7/site-packages/pandas/_libs/ops.pyx in pandas._libs.ops.scalar_compare()
TypeError: '<' not supported between instances of 'int' and 'str'
kirc
data
pd.DataFrame({'A1CF': {'TCGA-A3-3307-01': 750,
'TCGA-A3-3308-01': 579,
'TCGA-A3-3311-01': 2186,
'TCGA-A3-3313-01': 220},
'A2BP1': {'TCGA-A3-3307-01': 0,
'TCGA-A3-3308-01': 7,
'TCGA-A3-3311-01': 6,
'TCGA-A3-3313-01': 43},
'A2LD1': {'TCGA-A3-3307-01': 460,
'TCGA-A3-3308-01': 433,
'TCGA-A3-3311-01': 692,
'TCGA-A3-3313-01': 1534},
'A2ML1': {'TCGA-A3-3307-01': 64,
'TCGA-A3-3308-01': 177,
'TCGA-A3-3311-01': 134,
'TCGA-A3-3313-01': 693}})
normal data
pd.DataFrame({'A1CF': {'TCGA-A3-3387-11': 2728,
'TCGA-B0-4700-11': 434,
'TCGA-B0-4712-11': 11,
'TCGA-B0-5402-11': 640},
'A2BP1': {'TCGA-A3-3387-11': 45,
'TCGA-B0-4700-11': 14,
'TCGA-B0-4712-11': 74,
'TCGA-B0-5402-11': 60},
'A2LD1': {'TCGA-A3-3387-11': 6614,
'TCGA-B0-4700-11': 1178,
'TCGA-B0-4712-11': 1201,
'TCGA-B0-5402-11': 1058},
'A2ML1': {'TCGA-A3-3387-11': 46,
'TCGA-B0-4700-11': 26,
'TCGA-B0-4712-11': 78,
'TCGA-B0-5402-11': 47}})
Upvotes: 2
Views: 94
Reputation: 4608
Because you're using iterrows() function wrong. here is an example i value:
for i in normal.T[0:1].iterrows():
print(i)
#output
'''
('A1CF', TCGA-A3-3387-11 2728
TCGA-B0-4700-11 434
TCGA-B0-4712-11 11
TCGA-B0-5402-11 640
Name: A1CF, dtype: int64)
'''
use this:
for i,j in normal.T.iterrows():
print(j['TCGA-A3-3387-11']) #you have to enter the name of the column you want to use here:
#output
'''
2728
45
6614
46
'''
Also, using iterrows() is not recommended. Because it is very slow. You can use df.to_dict() instead. You should take a look at this article.
Here is an example usage:
for j in normal.T.to_dict('records'):
print(j['TCGA-A3-3387-11'])
#output
'''
2728
45
6614
46
'''
Upvotes: 1