np20
np20

Reputation: 2095

Producer Consumer using semaphores and mutexes in Python

I'm trying to understand how to implement a Queue with a bounded buffer size that can be used by multiple producers and consumers using Python Semaphores. Here's my implementation:

class Q:

    def __init__(self, size):
        self.buff = [None]*size
        self.end = 0
        self.start = 0
        self.size = size
        self.end_lock = Lock()  # protect end from race across multiple producers
        self.start_lock = Lock()  # protect start from race across multiple consumers
        self.open = Semaphore(size)  # block till there's space to produce
        self.closed = Semaphore(size) # block till there's item to consume
        for _ in range(size):  # initialize with all closed acquired so that consumer is blocked
            self.closed.acquire()

    def put(self, val):
        self.open.acquire()
        with self.end_lock:
            self.buff[self.end] = val
            self.end = (self.end+1)%self.size
        self.closed.release()

    def get(self):
        self.closed.acquire()
        with self.start_lock:
            val = self.buff[(self.start)%self.size]
            self.start = (self.start+1)%self.size
        self.open.release()
        return val

Is this implementation bug-free? Could this be simplified further to use fewer mutexes/semaphores?

Upvotes: 3

Views: 2162

Answers (2)

binarytrails
binarytrails

Reputation: 645

from time import sleep
from random import randint
from threading import Thread, Semaphore

s = Semaphore(1)

producer_idx = 0
consumer_idx = 0
counter = 0

buf_size = 10
buf = [" "] * buf_size
print(buf)

def produce():
    global producer_idx, counter, buf, buf_size
    while True:
        #s.acquire()
        with s:
            if (counter == buf_size): # full
                #s.release()
                continue
            buf[producer_idx] = "x"
            producer_idx = (producer_idx + 1) % buf_size
            print("{} <= produced 'x' at index='{}'".format(buf, producer_idx))
            counter = counter + 1
        #s.release()
        sleep(1)

def consume():
    global consumer_idx, counter, buf, buf_size
    while True:
        #s.acquire()
        with s:
            if (counter == 0): # empty (next element is)
                #s.release()
                continue
            buf[consumer_idx] = " "
            consumer_idx = (consumer_idx + 1) % buf_size
            print("{} => consumed '{}' at index='{}'".format(buf, buf[consumer_idx], consumer_idx))
            counter = counter - 1
        #s.release()
        sleep(1)

producers = list()

for i in range(randint(10,20)):
    producer = Thread(target=produce)
    producer.start()
    producers.append(producer)

consumers = list()

for i in range(randint(10,20)):
    consumer = Thread(target=consume)
    consumer.start()
    consumers.append(consumer)
moi python $ python boundedbuffer_semaphore.py 
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
['x', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] <= produced 'x' at index='1'
['x', 'x', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] <= produced 'x' at index='2'
['x', 'x', 'x', ' ', ' ', ' ', ' ', ' ', ' ', ' '] <= produced 'x' at index='3'
['x', 'x', 'x', 'x', ' ', ' ', ' ', ' ', ' ', ' '] <= produced 'x' at index='4'
['x', 'x', 'x', 'x', 'x', ' ', ' ', ' ', ' ', ' '] <= produced 'x' at index='5'
['x', 'x', 'x', 'x', 'x', 'x', ' ', ' ', ' ', ' '] <= produced 'x' at index='6'
['x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' ', ' '] <= produced 'x' at index='7'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' '] <= produced 'x' at index='8'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' '] <= produced 'x' at index='9'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='0'
[' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] => consumed 'x' at index='1'
[' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] => consumed 'x' at index='2'
['x', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='1'
['x', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] => consumed 'x' at index='3'
['x', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x'] => consumed 'x' at index='4'
['x', 'x', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='2'
['x', 'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='3'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='4'
['x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x', 'x'] => consumed 'x' at index='5'
['x', 'x', 'x', 'x', ' ', ' ', 'x', 'x', 'x', 'x'] => consumed 'x' at index='6'
['x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x', 'x'] <= produced 'x' at index='5'
['x', 'x', 'x', 'x', 'x', ' ', ' ', 'x', 'x', 'x'] => consumed 'x' at index='7'
['x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x', 'x'] <= produced 'x' at index='6'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='7'
['x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x', 'x'] => consumed 'x' at index='8'
['x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', ' ', 'x'] => consumed 'x' at index='9'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' ', 'x'] <= produced 'x' at index='8'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x'] <= produced 'x' at index='9'
['x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='0'
[' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='1'
[' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='2'
[' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='3'
[' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='4'
[' ', ' ', ' ', ' ', ' ', 'x', 'x', 'x', 'x', ' '] => consumed 'x' at index='5'
[' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x', 'x', ' '] => consumed 'x' at index='6'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', 'x', ' '] => consumed 'x' at index='7'
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'x', ' '] => consumed 'x' at index='8'

https://github.com/binarytrails/various/blob/master/python/boundedbuffer_semaphore.py

Upvotes: 0

PoByBolek
PoByBolek

Reputation: 3915

Looks good to me. The semaphores prevent concurrent producers and consumers from writing and reading too much and the locks prevent concurrent producers or consumers from modifying the end or start indices simultaneously.

The two semaphores are definitely necessary. You could remove one of the locks and use it in both get and put to protect both the start and the end index which wouldn't allow consumers and producers to access the queue simultaneously. (CPython's queue implementation does this.)


I would remove the size attribute in favor of len(self.buff) though and rename the start and end indices to read_index and write_index respectively (and the locks as well). Also, I think you could access the buffer without holding the locks (because lists themselves are thread-safe):

    def put(self, val):
        self.open.acquire()
        with self.write_lock:
            index = self.write_index
            self.write_index = (self.write_index + 1) % len(self.buff)
        self.buff[index] = val
        self.closed.release()

    def get(self):
        self.closed.acquire()
        with self.read_lock:
            index = self.read_index
            self.read_index = (self.read_index + 1) % len(self.buff)
        val = self.buff[index]
        self.open.release()
        return val

Here's a small test program I used to play around:

def producer(queue, start, end, step):
    for value in range(start, end, step):
        queue.put(value)
    print('Producer finished')


def consumer(queue, count, result, lock):
    local_result = []
    for _ in range(count):
        local_result.append(queue.get())
    with lock:
        result.update(local_result)
    print('Consumer finished')


def main():
    value_count = 500000
    producer_count = 50
    consumer_count = 50
    assert value_count % producer_count == 0
    assert value_count % consumer_count == 0

    queue = Queue(123)
    result = set()
    lock = Lock()
    producers = [Thread(target=producer, args=(queue, i, value_count, producer_count)) for i in range(producer_count)]
    consumers = [Thread(target=consumer, args=(queue, value_count // consumer_count, result, lock)) for _ in range(consumer_count)]

    for p in producers:
        p.start()
    for c in consumers:
        c.start()

    for p in producers:
        p.join()
    for c in consumers:
        c.join()

    if len(result) != value_count:
        raise ValueError('Result size is %d instead of %d' % (len(result), value_count))


if __name__ == '__main__':
    main()

Upvotes: 2

Related Questions