Reputation: 33
I have a problem where I've to sort a very big array(shape - 7900000X4X4) with a custom function. I used sorted
but it took more than 1 hour to sort. My code was something like this.
def compare(x,y):
print('DD '+str(x[0]))
if(np.array_equal(x[1],y[1])==True):
return -1
a = x[1].flatten()
b = y[1].flatten()
idx = np.where( (a>b) != (a<b) )[0][0]
if a[idx]<0 and b[idx]>=0:
return 0
elif b[idx]<0 and a[idx]>=0:
return 1
elif a[idx]<0 and b[idx]<0:
if a[idx]>b[idx]:
return 0
elif a[idx]<b[idx]:
return 1
elif a[idx]<b[idx]:
return 1
else:
return 0
def cmp_to_key(mycmp):
class K:
def __init__(self, obj, *args):
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj)
return K
tblocks = sorted(tblocks.items(),key=cmp_to_key(compare))
This worked but I want it to complete in seconds. I don't think any direct implementation in python can give me the performance I need, so I tried cython. My Cython code is this, which is pretty simple.
cdef int[:,:] arrr
cdef int size
cdef bool compare(int a,int b):
global arrr,size
cdef int[:] x = arrr[a]
cdef int[:] y = arrr[b]
cdef int i,j
i = 0
j = 0
while(i<size):
if((j==size-1)or(y[j]<x[i])):
return 0
elif(x[i]<y[j]):
return 1
i+=1
j+=1
return (j!=size-1)
def sorted(np.ndarray boxes,int total_blocks,int s):
global arrr,size
cdef int i
cdef vector[int] index = xrange(total_blocks)
arrr = boxes
size = s
sort(index.begin(),index.end(),compare)
return index
This code in cython took 33 seconds! Cython is the solution, but I am looking for some alternate solutions which can run directly on python. For example numba. I tried Numba, but I didn't get satisfying results. Kindly help!
Upvotes: 3
Views: 467
Reputation: 6482
It is hard to give an answer without a working example. I assume, that arrr in your Cython code was a 2D-array and I assume that size was size=arrr.shape[0]
Numba Implementation
import numpy as np
import numba as nb
from numba.targets import quicksort
def custom_sorting(compare_fkt):
index_arange=np.arange(size)
quicksort_func=quicksort.make_jit_quicksort(lt=compare_fkt,is_argsort=False)
jit_sort_func=nb.njit(quicksort_func.run_quicksort)
index=jit_sort_func(index_arange)
return index
def compare(a,b):
x = arrr[a]
y = arrr[b]
i = 0
j = 0
while(i<size):
if((j==size-1)or(y[j]<x[i])):
return False
elif(x[i]<y[j]):
return True
i+=1
j+=1
return (j!=size-1)
arrr=np.random.randint(-9,10,(7900000,8))
size=arrr.shape[0]
index=custom_sorting(compare)
This gives 3.85s for the generated testdata. But the speed of a sorting algorithm heavily depends on the data....
Simple Example
import numpy as np
import numba as nb
from numba.targets import quicksort
#simple reverse sort
def compare(a,b):
return a > b
#create some test data
arrr=np.array(np.random.rand(7900000)*10000,dtype=np.int32)
#we can pass the comparison function
quicksort_func=quicksort.make_jit_quicksort(lt=compare,is_argsort=True)
#compile the sorting function
jit_sort_func=nb.njit(quicksort_func.run_quicksort)
#get the result
ind_sorted=jit_sort_func(arrr)
This implementation is about 35% slower than np.argsort, but this is also common in using np.argsort in compiled code.
Upvotes: 1
Reputation: 53029
If I understand your code correctly then the order you have in mind is the standard order, only that it starts at 0
wraps around at +/-infinity
and maxes out at -0
. On top of that we have simple left-to-right lexicographic order.
Now, if your array dtype is integer, observe the following: Because of complement representation of negatives view-casting to unsigned int makes your order the standard order. On top of that, if we use big endian encoding, efficient lexicographic ordering can be achieved by view-casting to void
dtype.
The code below shows that using a 10000x4x4
example that this method gives the same result as your Python code.
It also benchmarks it on a 7,900,000x4x4
example (using array, not dict). On my modest laptop this method takes 8
seconds.
import numpy as np
def compare(x, y):
# print('DD '+str(x[0]))
if(np.array_equal(x[1],y[1])==True):
return -1
a = x[1].flatten()
b = y[1].flatten()
idx = np.where( (a>b) != (a<b) )[0][0]
if a[idx]<0 and b[idx]>=0:
return 0
elif b[idx]<0 and a[idx]>=0:
return 1
elif a[idx]<0 and b[idx]<0:
if a[idx]>b[idx]:
return 0
elif a[idx]<b[idx]:
return 1
elif a[idx]<b[idx]:
return 1
else:
return 0
def cmp_to_key(mycmp):
class K:
def __init__(self, obj, *args):
self.obj = obj
def __lt__(self, other):
return mycmp(self.obj, other.obj)
return K
def custom_sort(a):
assert a.dtype==np.int64
b = a.astype('>i8', copy=False)
return b.view(f'V{a.dtype.itemsize * a.shape[1]}').ravel().argsort()
tblocks = np.random.randint(-9,10, (10000, 4, 4))
tblocks = dict(enumerate(tblocks))
tblocks_s = sorted(tblocks.items(),key=cmp_to_key(compare))
tblocksa = np.array(list(tblocks.values()))
tblocksa = tblocksa.reshape(tblocksa.shape[0], -1)
order = custom_sort(tblocksa)
tblocks_s2 = list(tblocks.items())
tblocks_s2 = [tblocks_s2[o] for o in order]
print(tblocks_s == tblocks_s2)
from timeit import timeit
data = np.random.randint(-9_999, 10_000, (7_900_000, 4, 4))
print(timeit(lambda: data[custom_sort(data.reshape(data.shape[0], -1))],
number=5) / 5)
Sample output:
True
7.8328493310138585
Upvotes: 0