Reputation: 51

Python multiprocessing and shared variable

I'm not an expert on python but I have managed to write down a multiprocessing code that uses all my cpus and cores in my PC. My code loads a very large array, about 1.6 GB, and I need to update the array in every process. Fortunately, the update consists of adding some artificial stars to the image and every process has a different set of image positions where to add the artificial stars.

The image is too large and I can't create a new one every time a call a process. My solution was creating a variable in the shared memory and I save plenty of memory. For some reason, it works for 90% of the image but there are regions were my code add random numbers in some of the positions I sent before to the processes. Is it related to the way I create a shared variable? Are the processes interfering each other during the execution of my code?

Something weird is that when using a single cpu and single core, the images is 100% perfect and there are no random numbers added to the image. Do you suggest me a way to share a large array between multiple processes? Here the relevant part of my code. Please, read the line when I define the variable im_data.

import warnings
warnings.filterwarnings("ignore")

from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
import matplotlib.pyplot as plt
import sys,os
import subprocess
import numpy as np
import time
import cv2 as cv
import pyfits
from pyfits import getheader
import multiprocessing, Queue
import ctypes

class Worker(multiprocessing.Process):


def __init__(self, work_queue, result_queue):

    # base class initialization
    multiprocessing.Process.__init__(self)

    # job management stuff
    self.work_queue = work_queue
    self.result_queue = result_queue
    self.kill_received = False

def run(self):
    while not self.kill_received:

        # get a task
        try:
            i_range, psf_file = self.work_queue.get_nowait()
        except Queue.Empty:
            break

        # the actual processing
        print "Adding artificial stars - index range=", i_range

        radius=16
        x_c,y_c=( (psf_size[1]-1)/2, (psf_size[2]-1)/2 )
        x,y=np.meshgrid(np.arange(psf_size[1])-x_c,np.arange(psf_size[2])-y_c)
        distance = np.sqrt(x**2 + y**2)

        for i in range(i_range[0],i_range[1]):
            psf_xy=np.zeros(psf_size[1:3], dtype=float)
            j=0
            for i_order in range(psf_order+1):
                j_order=0
                while (i_order+j_order < psf_order+1):
                    psf_xy += psf_data[j,:,:] * ((mock_y[i]-psf_offset[1])/psf_scale[1])**i_order * ((mock_x[i]-psf_offset[0])/psf_scale[0])**j_order
                    j_order+=1
                    j+=1


            psf_factor=10.**( (30.-mock_mag[i])/2.5)/np.sum(psf_xy)
            psf_xy *= psf_factor

            npsf_xy=cv.resize(psf_xy,(npsf_size[0],npsf_size[1]),interpolation=cv.INTER_LANCZOS4)
            npsf_factor=10.**( (30.-mock_mag[i])/2.5)/np.sum(npsf_xy)
            npsf_xy *= npsf_factor

            im_rangex=[max(mock_x[i]-npsf_size[1]/2,0), min(mock_x[i]-npsf_size[1]/2+npsf_size[1], im_size[1])]
            im_rangey=[max(mock_y[i]-npsf_size[0]/2,0), min(mock_y[i]-npsf_size[0]/2+npsf_size[0], im_size[0])]
            npsf_rangex=[max(-1*(mock_x[i]-npsf_size[1]/2),0), min(-1*(mock_x[i]-npsf_size[1]/2-im_size[1]),npsf_size[1])]
            npsf_rangey=[max(-1*(mock_y[i]-npsf_size[0]/2),0), min(-1*(mock_y[i]-npsf_size[0]/2-im_size[0]),npsf_size[0])]

            im_data[im_rangey[0]:im_rangey[1], im_rangex[0]:im_rangex[1]] = 10.


        self.result_queue.put(id)

if __name__ == "__main__":

  n_cpu=2
  n_core=6
  n_processes=n_cpu*n_core*1
  input_mock_file=sys.argv[1]

  print "Reading file ", im_file[i]
  hdu=pyfits.open(im_file[i])
  data=hdu[0].data
  im_size=data.shape

  im_data_base = multiprocessing.Array(ctypes.c_float, im_size[0]*im_size[1])
  im_data = np.ctypeslib.as_array(im_data_base.get_obj())
  im_data = im_data.reshape(im_size[0], im_size[1])
  im_data[:] = data
  data=0
  assert im_data.base.base is im_data_base.get_obj()

  # run
  # load up work queue
  tic=time.time()
  j_step=np.int(np.ceil( mock_n*1./n_processes ))
  j_range=range(0,mock_n,j_step)
  j_range.append(mock_n)


  work_queue = multiprocessing.Queue()
  for j in range(np.size(j_range)-1):
    if work_queue.full():
      print "Oh no! Queue is full after only %d iterations" % j
    work_queue.put( (j_range[j:j+2], psf_file[i]) )

  # create a queue to pass to workers to store the results
  result_queue = multiprocessing.Queue()

  # spawn workers
  for j in range(n_processes):
    worker = Worker(work_queue, result_queue)
    worker.start()

  # collect the results off the queue
  while not work_queue.empty():
    result_queue.get()

  print "Writing file ", mock_im_file[i]
  hdu[0].data=im_data
  hdu.writeto(mock_im_file[i])
  print "%f s for parallel computation." % (time.time() - tic)

Upvotes: 5

Answers (2)

sega_sai

Reputation: 8528

The problem occurs in your example when multiple threads write into overlapping regions in the image/array. So indeed you either have to put one lock per image or create a set of locks per image sections (to reduce lock contention).

Or you can produce image modifications in one set of processes and do the actual modification of the image in a separate single thread.

Upvotes: 1

Xion345

Reputation: 1616

I think the problem (as you suggested it in your question) comes from the fact that you are writing in the same array from multiple threads.

im_data_base = multiprocessing.Array(ctypes.c_float, im_size[0]*im_size[1])
im_data = np.ctypeslib.as_array(im_data_base.get_obj())
im_data = im_data.reshape(im_size[0], im_size[1])
im_data[:] = data

Although I am pretty sure that you could write into im_data_base in a "process-safe" manner (a implicit lock is used by python to synchronize access to the array), I am not sure you can write into im_data in a process-safe manner.

I would therefore (even though I am not sure I will solve your issue) advise you to create an explicit lock around im_data

# Disable python implicit lock, we are going to use our own
im_data_base = multiprocessing.Array(ctypes.c_float, im_size[0]*im_size[1], 
    lock=False)
im_data = np.ctypeslib.as_array(im_data_base.get_obj())
im_data = im_data.reshape(im_size[0], im_size[1])
im_data[:] = data
# Create our own lock
im_data_lock = Lock()

Then in the processes, acquire the lock each time you need to modify im_data

self.im_data_lock.acquire()
im_data[im_rangey[0]:im_rangey[1], im_rangex[0]:im_rangex[1]] = 10
self.im_data_lock.release()

I omitted the code to pass the lock to the contructor of your process and store it as a member field (self.im_data_lock) for the sake of brevity. You should also pass the im_data array to the constructor of your process and store it as a member field.

Upvotes: 3

Python multiprocessing and shared variable

Answers (2)

Related Questions