Reputation: 23
I use a numpy array as a coordinate system. The coordinates are all of type int. My function checks if both the x and y coordinate is the same as an input coordinate and returns the index of that element in the numpy array.
This is a standalone function in my script, when I run a cProfile script over it, this is the slowest function i got.
My question is, does anyone know a faster way to check this Nx2 numpy array?
This is the function:
def findPerson(coordinate, input_array):
return np.where( (input_array[:,0] == coordinate[0]) & (input_array[:,1] == coordinate[1]) )[0]
EDIT: By request, here is a test sample.
predator = np.array([1, 1])
people_array = np.array([[-1, 1], [2, 2], [1, 1]])
print(findPerson(predator, people_array))
This test script prints out [2], this is the index in people_array that is in the same coordinate as the "predator"
Upvotes: 1
Views: 232
Reputation: 4085
My benchmarking:
|---------------------|---------|
|Algorithm |Time |
|---------------------|---------|
|Original (findPerson)| 8.4 ms|
|Firman (find_idx) | 41.8 ms|
|Numba | 4.89 ms|
|Numba parallel | 3.21 ms|
|Cython | 2.70 ms|
|Cython parallel | 2.90 ms|
|---------------------|---------|
My setup:
Processor: Intel i5-9600K 3.70GHz, 6 core
Versions:
Python: 3.8.0
Numba: 0.46.0
Cython: 0.29.14
Numpy: 1.17.4
My data setup (the original array is too small, so I decided to copy it 1 mln times):
predator = np.array([1, 1])
people_array = np.array([[-1, 1], [2, 2], [1, 1]]*1000000)
So the fastest option is to use Cython (code as it works in Jupyter):
%%cython -a
import cython
import numpy as np
cimport numpy as np
from numpy cimport ndarray
from cython.parallel import prange
from libc.stdint cimport uint32_t, int64_t
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.infer_types(True)
@cython.initializedcheck(False)
def cy_loop(int64_t[:] coordinate,int64_t[:,:] input_array):
alen = input_array.shape[0]
cdef uint32_t[:] res1 = np.empty(alen,np.uint32)
cdef uint32_t ii = 0
for i in range(alen):
if (input_array[i,0] == coordinate[0]) and (input_array[i,1] == coordinate[1]):
res1[ii] = i
ii = ii + 1
return np.asarray(res1[:ii])
The easiest to setup is Numba non-parallel:
@numba.njit(nogil=True)
def findPerson_nb2(coordinate, input_array):
return np.where( (input_array[:,0] == coordinate[0]) & (input_array[:,1] == coordinate[1]) )[0]
Numba parallel (maybe it is also easy to setup, but parallel programs could be tricky):
import math
@numba.njit(parallel=True)
def findPerson_nb4(coordinate, input_array, alen):
# alen = input_array.shape[0]
n_batches = 768 #6*16*8
batch_size = math.ceil(alen/n_batches)
res = np.empty((n_batches, batch_size),dtype=np.int64)
res_len = np.empty(n_batches,dtype=np.int64)
for i in numba.prange(n_batches):
start = i*batch_size
if i == (n_batches - 1):
end = alen
else:
end = (i+1)*batch_size
res_i = start + np.where( (input_array[start:end,0] == coordinate[0]) & (input_array[start:end,1] == coordinate[1]))[0]
ailen = res_i.shape[0]
res[i,:ailen] = res_i
res_len[i] = ailen
return res, res_len
@numba.njit()
def myconcat(a_in, a_in_len, alen):
res = np.empty(alen,dtype=np.int64)
ii = 0
for i in range(res_len.shape[0]):
for j in range(res_len[i]):
res[ii] = a_in[i,j]
ii = ii + 1
return res, ii
Cython parallel:
%%cython -a --compile-args=/openmp --link-args=/openmp --force
import cython
import numpy as np
cimport numpy as np
from numpy cimport ndarray
from cython.parallel import prange
from libc.stdint cimport uint32_t, int64_t
from libc.math cimport ceil
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.infer_types(True)
@cython.initializedcheck(False)
def cy_loop3(int64_t[:] coordinate,int64_t[:,:] input_array):
alen = input_array.shape[0]
cdef uint32_t n_batches = 6*16 #6*16*8
cdef uint32_t batch_size = <uint32_t>ceil(alen/n_batches)
cdef uint32_t[:,:] res = np.empty((n_batches, batch_size),dtype=np.uint32)
cdef uint32_t[:] res_len = np.empty(n_batches,dtype=np.uint32)
cdef uint32_t start, end, ii, i, j
for i in prange(n_batches, nogil=True):
start = i*batch_size
if i == (n_batches - 1):
end = alen
else:
end = (i+1)*batch_size
ii = 0
for j in range(start,end):
if (input_array[j,0] == coordinate[0]) and (input_array[j,1] == coordinate[1]):
res[i, ii] = j
ii = ii + 1
res_len[i] = ii
return np.asarray(res), np.asarray(res_len)
My tests:
%%timeit
findPerson(predator, people_array)
%%timeit
find_idx(predator, people_array)
%%timeit
findPerson_nb2(predator, people_array)
%%timeit
alen = people_array.shape[0]
res, res_len = findPerson_nb4(predator, people_array,alen)
ar, alen1 = myconcat(res,res_len, alen)
res = ar[:alen1]
%%timeit
cy_loop(predator, people_array)
%%timeit
cy_loop3(predator, people_array)
Upvotes: 1
Reputation: 948
Can you try this?
import numpy as np
def find_idx(pt, ptslist):
return np.where(np.all(pt == ptslist, axis=1))[0]
It also works for more than 2 dimensions.
Upvotes: 0