Reputation: 161
I want to compare the effect of multiprocessing for bubble sort.
Let us consider first the original one without multiprocessing:
import multiprocessing
import random
import time
import numpy as np
def bubble_sort(array):
check = True
while check == True:
check = False
for i in range(len(array) - 1):
if array[i] > array[i + 1]:
check = True
temp = array[i]
array[i] = array[i + 1]
array[i + 1] = temp
print("Array sorted: ", array)
if __name__ == "__main__":
array = np.random.randint(0, 1000, 10000)
start = time.time()
bubble_sort(array)
print("Time taken: ", time.time() - start)
The result is:
Array sorted: [ 0 0 0 ... 999 999 999]
Time taken: 25.157966375350952
Now with multiprocessing:
if __name__ == "__main__":
array = np.random.randint(0, 1000, 10000)
p = multiprocessing.Process(target=bubble_sort, args=(array,))
start = time.time()
p.start()
p.join()
print("Time taken: ",time.time()-start)
The result is:
Array sorted: [ 0 0 0 ... 999 999 999]
Time taken: 24.962100744247437
There is only one second difference, am I missing something?
Upvotes: -5
Views: 80
Reputation: 834
You are not doing multiprocessing in your code. Try this code to see the difference
import multiprocessing
import numpy as np
import time
import copy
def bubble_sort(array):
n = len(array)
for i in range(n):
for j in range(0, n - i - 1):
if array[j] > array[j + 1]:
array[j], array[j + 1] = array[j + 1], array[j]
return array
def merge_chunks(left, right):
merged = []
left_index = 0
right_index = 0
while left_index < len(left) and right_index < len(right):
if left[left_index] <= right[right_index]:
merged.append(left[left_index])
left_index += 1
else:
merged.append(right[right_index])
right_index += 1
merged.extend(left[left_index:])
merged.extend(right[right_index:])
return merged
def parallel_bubble_sort(array):
num_processes = multiprocessing.cpu_count() # Use all available cores
chunk_size = (len(array) + num_processes - 1) // num_processes
pool = multiprocessing.Pool(processes=num_processes)
chunks = []
start = 0
for i in range(num_processes):
end = min((i + 1) * chunk_size, len(array))
chunks.append(array[start:end])
start = end
results = pool.map(bubble_sort, chunks)
pool.close()
pool.join()
# Merge the sorted chunks
while len(results) > 1:
new_results = []
for i in range(0, len(results), 2):
if i + 1 < len(results):
merged_chunk = merge_chunks(results[i], results[i+1])
new_results.append(merged_chunk)
else:
new_results.append(results[i])
results = new_results
return results[0]
if __name__ == "__main__":
array_size = 10000
array = np.random.randint(0, 1000, array_size).tolist()
array_copy = copy.deepcopy(array)
start_time = time.perf_counter()
sorted_array = bubble_sort(array)
end_time = time.perf_counter()
print("Bubble sort without multiprocessing: ", end_time - start_time)
start_time = time.perf_counter()
sorted_array = parallel_bubble_sort(array_copy)
end_time = time.perf_counter()
print("Bubble sort with multiprocessing: ", end_time - start_time)
I split the array into chunks and gave each chunk to a processor core to be sorted. Then I merged the sorted chunks, ensuring the merged chunks are ordered, and get this results.
Bubble sort without multiprocessing: 17.12360268399061
Bubble sort with multiprocessing: 1.380762806002167
Upvotes: 1
Reputation: 27211
You haven't accounted for the fact that the values in the ndarray are almost certainly different for each of your tests. The time taken by your Bubble Sort implementation will depend on the order of values in the ndarray.
Ignoring the woeful inefficiency of your Bubble Sort implementation, consider this test:
from multiprocessing import Process
import time
import numpy as np
def bubble_sort(array: np.ndarray) -> None:
check = True
while check == True:
check = False
for i in range(len(array) - 1):
if array[i] > array[i + 1]:
check = True
temp = array[i]
array[i] = array[i + 1]
array[i + 1] = temp
if __name__ == "__main__":
data = np.random.randint(0, 1_000, 10_000)
p = Process(target=bubble_sort, args=(data,))
start = time.time()
p.start()
p.join()
print(time.time() - start)
start = time.time()
bubble_sort(data)
print(time.time() - start)
Output:
15.966789960861206
15.38444972038269
We see here that the subprocess appears to take longer. This is due to at least two factors - 1) Overhead in starting the subprocess, 2) Overhead in serialisation of the ndarray
These results are as expected.
Upvotes: 0