Reputation: 31
guys:
I am new to the use of python multiprocessing. Recently my research needs calculation with many iterations. So I tried to use multiprocessing to speed it up. But when I wrote a small sample code, I found that the curve I got with multiprocessing is different from that without multiprocessing.
The code with multiprocessing:
import random
import matplotlib.pyplot as plt
import math
import numpy as np
import multiprocessing as mp
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
if __name__ =='__main__':
pool = mp.Pool(8)
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(pool.apply_async(time_evolution, args=(system_temp,steps,)))
pool.close()
pool.join()
for count in range(0,len(res)):
totalnum =totalnum+ np.array(res[count].get())
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
#plt.loglog(time,density_mod)
#plt.savefig("modified_density_loglog.pdf")
#plt.close()
myfile=open('density_mod2.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the code without multiprocessing is
import random
import matplotlib.pyplot as plt
import math
import numpy as np
class Classic:
def __init__(self,position,type):
assert type == 'A' or type == 'B'
self.position = position
self.type = type
def getposition(self):
return self.position
def gettype (self):
return self.type
def setposition(self,pos):
self.position = pos
def settype (self,t):
self.type = t
def number_count(system):
counter = 0
for i in range(0,len(system)):
if system[i] !=0:
counter=counter+1
return counter
def time_evolution(system_temp,steps):
numberlist=np.zeros(steps)
number = number_count(system_temp)
for t in range(0,steps):
for i in range(0,len(system_temp)):
x = random.randint(0, len(system_temp)-2)
if system_temp[x]!=0 and system_temp[x+1]!=0:
p1 = system_temp[x]
p2 = system_temp[x+1]
p1_type = p1.gettype()
p2_type = p2.gettype()
exchange_check = random.randint(0,1)
if p1_type == p2_type:
system_temp[x]=0
system_temp[x+1]=0
number = number-2
elif exchange_check == 1:
type_temp = p1_type
p1.settype(p2_type)
p2.settype(type_temp)
elif system_temp[x]!=0 and system_temp[x+1]==0:
system_temp[x+1] = system_temp[x]
system_temp[x] =0
elif system_temp[x]==0 and system_temp[x+1]!=0:
system_temp[x]=system_temp[x+1]
system_temp[x+1]=0
numberlist[t]=numberlist[t]+number
return numberlist
size = 10000
system_init = [0]*size
particle_num = 3000
repeat = 20
steps = 2000
res=[]
totalnum= np.zeros(steps)
randomlist = random.sample(range(1,100*repeat),repeat)
for i in range(0,particle_num):
pos = random.randint(0,size-1)
ran_num = random.randint (0,1)
if ran_num == 0:
temp_type = 'A'
else:
temp_type = 'B'
if system_init[pos] ==0:
system_init[pos] = Classic(pos,temp_type)
for k in range(0, repeat):
system_temp = system_init[:]
random.seed(randomlist[k])
res.append(time_evolution(system_temp,steps))
for count in range(0,len(res)):
totalnum +=res[count]
time=np.linspace(1,steps+1,steps)
time_sqrt=np.sqrt(8.0*math.pi*time)
density =totalnum/(repeat*size)
density_mod = np.multiply(time_sqrt,density)
myfile=open('density_mod3.txt','w')
for element in density_mod:
myfile.write(str(element))
myfile.write('\n')
myfile.close()
And the result is shown as
The blue curve is result with multiprocessing and the orange one is that without multiprocessing. I am not sure why this would happen. How to fix it?
Upvotes: 2
Views: 152
Reputation: 11321
My guess is that you don't initialize the random number generator correctly. You have to do that "inside" the spawned processes.
Check the following simple example:
import random
import multiprocessing as mp
def rand_test_1():
print(random.randint(0, 100))
return None
def rand_test_2(seed):
random.seed(seed)
print(random.randint(0, 100))
return None
if __name__ == '__main__':
repeat = 3
randomlist = random.sample(range(1, 100 * repeat), repeat)
print('Classic:')
for k in range(repeat):
random.seed(randomlist[k])
rand_test_1()
print('\nMultiprocessing version 1:')
with mp.Pool() as pool:
for k in range(repeat):
random.seed(randomlist[k])
pool.apply_async(rand_test_1, args=tuple())
pool.close()
pool.join()
print('\nMultiprocessing version 2:')
with mp.Pool() as pool:
for k in range(repeat):
pool.apply_async(rand_test_2, args=(randomlist[k],))
pool.close()
pool.join()
The results look like:
Classic:
32
78
6
Multiprocessing version 1:
84
43
90
Multiprocessing version 2:
32
78
6
You are using the multiprocessing version 1, I think you should use version 2.
One other point which has nothing to do with your problem: My impression is that it might be a good idea to use .map
/.starmap
(see here) instead of .apply_async
:
...
with mp.Pool() as pool:
res = list(pool.map(rand_test_2, randomlist))
Upvotes: 4