Reputation: 441
Hi I need to speed up this code
import numpy as np
matrix3d=np.empty([10,10,1000])
matrix3d[:]=np.random.randint(10)
matrix3d_1=np.empty([10,10,1000])
x=10
y=1
for z in range(0,1000):
matrix3d_1[:,:,z]=func(matrix3d[:,:,z],x,y)
def func(matrix,x,y):
return matrix*x+y
I have tried using multiprocessig
using Pool.map()
but it did not work.
from functools import partial
import multiprocessing as mp
pool=mp.Pool(processes=2)
args=partial(func,x,y)
matrix3d_2=np.empty([10,10,1000])
matrix3d_2=pool.map(args,matrix3d)
pool.close()
If I compare the two matrix matrix3d_1==matrix3d_2
the results is false.
How can this be fixed?
Upvotes: 1
Views: 2203
Reputation: 6492
Parallel processing of a 3d matrix
The python map method as well as the pool.map methode can only take one input object. See for example https://stackoverflow.com/a/10973817/4045774
To reduce the inputs to one input we can use for example functools. The input which remains have to be on the last place.
from functools import partial
import numpy as np
import multiprocessing as mp
def main():
matrix3d=np.empty([10,10,1000])
matrix3d[:]=np.random.randint(10)
matrix3d_1=np.empty([10,10,1000])
x=10
y=1
pool=mp.Pool(processes=4)
func_p=partial(func,x,y)
#parallel map returns a list
res=pool.map(func_p,(matrix3d[:,:,z] for z in xrange(0,matrix3d.shape[2])))
#copy the data to array
for i in xrange(0,matrix3d.shape[2]):
matrix3d_1[:,:,i]=res[i]
def func(x,y,matrix):
return matrix*x+y
Parallel version using numba
This version will scale well over all cores and is at least 200 times faster than simple multiprocessing shown above. I have modified the code you linked to a bit, to get rid of any other dependencies than numpy.
import numpy
from numba import njit, prange
nb_meanInterp = njit("float32[:,:](float32[:,:],int64,int64)")(meanInterp)
resample_3d_nb = njit("float32[:,:,:](float32[:,:,:],int64,int64)",parallel=True)(resample_3d)
def resample_3d(matrix_3d,x,y):
matrix3d_res=numpy.empty((x,y,matrix_3d.shape[2]),dtype=numpy.float32)
for z in prange(0,matrix_3d.shape[2]):
matrix3d_res[:,:,z]=nb_meanInterp(matrix_3d[:,:,z],x,y)
return matrix3d_res
def meanInterp(data, m, n):
newData = numpy.zeros((m,n),dtype=numpy.float32)
mOrig, nOrig = data.shape
hBoundariesOrig, vBoundariesOrig = numpy.linspace(0,1,mOrig+1),
numpy.linspace(0,1,nOrig+1)
hBoundaries, vBoundaries = numpy.linspace(0,1,m+1), numpy.linspace(0,1,n+1)
for iOrig in range(mOrig):
for jOrig in range(nOrig):
for i in range(m):
if hBoundaries[i+1] <= hBoundariesOrig[iOrig]: continue
if hBoundaries[i] >= hBoundariesOrig[iOrig+1]: break
for j in range(n):
if vBoundaries[j+1] <= vBoundariesOrig[jOrig]: continue
if vBoundaries[j] >= vBoundariesOrig[jOrig+1]: break
#boxCoords = ((hBoundaries[i], vBoundaries[j]),(hBoundaries[i+1], vBoundaries[j+1]))
#origBoxCoords = ((hBoundariesOrig[iOrig], vBoundariesOrig[jOrig]),(hBoundariesOrig[iOrig+1], vBoundariesOrig[jOrig+1]))
#area=overlap(boxCoords, origBoxCoords)
#hopefully this is equivivalent (not tested)-----
T_x=(hBoundaries[i],hBoundaries[i+1],hBoundariesOrig[iOrig],hBoundariesOrig[iOrig+1])
T_y=(vBoundaries[j],vBoundaries[j+1],vBoundariesOrig[jOrig],vBoundariesOrig[jOrig+1])
tx=(T_x[1]-T_x[0]+T_x[3]-T_x[2])-(max(T_x)-min(T_x))
ty=(T_y[1]-T_y[0]+T_y[3]-T_y[2])-(max(T_y)-min(T_y))
area=tx*ty
#------------------------
newData[i][j] += area * data[iOrig][jOrig] / (hBoundaries[1] * vBoundaries[1])
return newData
Upvotes: 2