maxgemilian
maxgemilian

Reputation: 143

Fast Multiprocessing for small processes in Python

I am facing the following problem: Within a larger python class I would like to execute a part of it in multiple processes to speed up my programm. As you see in my minimum example I have this function f(x), I call 10 times. Doing this by a single process takes around 1 second. With multiprocessing I would like to come as close to 100 ms as possible. Therefore, I already tried the Pool.map and Pool.imap_unordered method. As you see, they are much faster but dont give me the desired results.

Can you help me?

import time
from multiprocessing import Pool


class TestClass(object):

    def __init__(self):
        # Single Process
        self.result_list = []
        self.single_process()
        print(self.result_list)

        # Multiprocess ordered
        self.result_list = []
        self.multiprocessing_ordered()
        print(self.result_list)

        # Multiprocess unordered
        self.result_list = []
        self.multiprocessing_unordered()
        print(self.result_list)

    def f(self, x):
        time.sleep(0.1)
        self.result_list.append(x**2)

    def single_process(self):
        # Single process
        start_time = time.time()
        for x in range(10):
            self.f(x)
        print("Time with a single process: {0:.1f}".format((time.time() - start_time)*1e3))

    def multiprocessing_ordered(self):
        start_time = time.time()
        pool = Pool(10)
        pool.map(self.f, list(range(10)))
        pool.close()
        print("Time with multiprocessing (ordered): {0:.1f}".format((time.time() - start_time)*1e3))

    def multiprocessing_unordered(self):
        start_time = time.time()
        pool = Pool(10)
        pool.imap_unordered(self.f, list(range(10)))
        pool.close()
        print("Time with multiprocessing (unordered): {0:.1f}".format((time.time() - start_time)*1e3))


if __name__ == '__main__':

    test_object = TestClass()

Result:

Time with a single process: 1013.7 ms
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Time with multiprocessing (ordered): 280.3 ms
[]
Time with multiprocessing (unordered): 100.7 ms
[]

Upvotes: 0

Views: 1682

Answers (1)

Markus Weber
Markus Weber

Reputation: 1107

Processes live in an own memory space and thus the self.result_list is not shared between your parent and your child processes.

You know have multiple options:

  1. Use pipes or queues or any other te in order to communicate between your subprocesses and you main process.

  2. Return a result by your function and retrieve it in the main process like this:

import time
from multiprocessing import Pool


class TestClass(object):

    def __init__(self):
        # Single Process
        self.result_list = []
        self.single_process()
        print(self.result_list)

        # Multiprocess
        self.result_list = []
        self.multiprocessing()
        print(self.result_list)

    def f(self, x):
        time.sleep(0.1)
        return x**2

    def single_process(self):
        # Single process
        start_time = time.time()
        result = []
        for x in range(10):
             self.result_list.append(self.f(x))
        print("Time with a single process: {0:.1f}".format((time.time() - start_time)*1e3))

    def multiprocessing(self):
        pool = Pool(10)

        # Start calculation
        start_time = time.time()
        multiple_results = [pool.apply_async(self.f, (i,)) for i in range(10)]

        # Receive answer
        self.result_list = [res.get(timeout=1) for res in multiple_results]

        # Evaluate result
        print("Time with multiprocessing: {0:.1f}".format((time.time() - start_time)*1e3))

        pool.close()


if __name__ == '__main__':

    test_object = TestClass()

Result:

Time with a single process: 1002.0
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Time with multiprocessing: 102.8
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
  1. Try the threading module or ThreadPool. Threading shares the same memory space and thus the results can be shared by simply appending it to the list. With ThreadPool:
import time
from multiprocessing.pool import ThreadPool

class TestClass(object):

    def __init__(self):
        # Single Process
        self.result_list = []
        self.single_process()
        print(self.result_list)

        # Multiprocess ordered
        self.result_list = []
        self.multiprocessing_ordered()
        print(self.result_list)

        # Multiprocess unordered
        self.result_list = []
        self.multiprocessing_unordered()
        print(self.result_list)

    def f(self, x):
        time.sleep(0.1)
        self.result_list.append(x**2)

    def single_process(self):
        # Single process
        start_time = time.time()
        for x in range(10):
            self.f(x)
        print("Time with a single process: {0:.1f}".format((time.time() - start_time)*1e3))

    def multiprocessing_ordered(self):
        start_time = time.time()
        pool = ThreadPool(10)
        pool.map(self.f, list(range(10)))
        pool.close()
        print("Time with multiprocessing (ordered): {0:.1f}".format((time.time() - start_time)*1e3))

    def multiprocessing_unordered(self):
        start_time = time.time()
        pool = ThreadPool(10)
        [_ for _ in pool.imap_unordered(self.f, list(range(10)))]
        pool.close()
        print("Time with multiprocessing (unordered): {0:.1f}".format((time.time() - start_time)*1e3))


if __name__ == '__main__':

    test_object = TestClass()

Result with ThreadPool:

Time with a single process: 1002.0
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
Time with multiprocessing (ordered): 116.1
[0, 4, 1, 25, 36, 9, 16, 49, 81, 64]
Time with multiprocessing (unordered): 109.4
[0, 1, 4, 16, 25, 36, 9, 49, 81, 64]

Upvotes: 1

Related Questions