Gaurav Sisodia
Gaurav Sisodia

Reputation: 29

How to code for the object detection using python opencvDNN professionaly with minimum CPU and memory usage?

I am trying to run multiple inferences of Tensorflow's SSD model from the model zoo on the CPU to detect and track 'cars' in the CCTV feed using the OpenCV DNN module with Python on Windows server 2019.

These are the hardware configurations: Processor- Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz; RAM- 32 GB; Cores- 14;

The problem is only one inference is using nearly 35% of the CPU. My goal is to run the model on at least 15 video feeds.

What could be the best code structure for this? Currently, I am using 'flask' for initializing the thread of analytics and also for the output feed.

How many feeds I can analyze if not 15 with the best approach using python?

My current code is:

import cv2
import numpy as np
import time

from threading import Thread
from queue import Queue
from flask import Flask, Response


app = Flask(__name__)

q = Queue(maxsize=5)


# cap = cv2.VideoCapture(fr'rtsp://{uname}:{password}@{ip_add}:554/Streaming/channels/101')
cap = cv2.VideoCapture(r'videoTest.mp4')

ret_, frame_ = cap.read()

width = 1200
height = 650

frame_ = cv2.resize(frame_, (width, height))

mask_uni = np.zeros(frame_.shape, np.uint8)

pts = [(254, 9), (950, 6), (1196, 517), (1197, 640), (10, 642)]
points = np.array([pts])
# roi = cv2.boundingRect(points)

points_show = np.array(pts, np.int32)
points_show = points_show.reshape((-1, 1, 2))

model = cv2.dnn.readNet(
        model=r'ssd_mobilenet_v2_coco_2018_03_29\frozen_inference_graph.pb',
        config=r'ssd_mobilenet_v2_coco_2018_03_29\graph.pbtxt',
        framework='TensorFlow'
        )

font = cv2.FONT_HERSHEY_SIMPLEX

# load the COCO class names
with open('object_detection_classes_coco.txt', 'r') as f:
    class_names = f.read().split('\n')

# get a different color array for each of the classes
# COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))
COLORS = [[255, 0, 0],
          [255, 0, 0],
          [0, 255, 0],
          [0, 255, 255],
          [0, 0, 255],
          [255, 0, 255]]


def analytics(que):
    global cap
    current_frame = 1
    try:
        while True:
            ret, frame = cap.read()

            if not ret:
                cap = cv2.VideoCapture(r'videoTest.mp4')
                continue

            if current_frame % 997 == 0:
                current_frame = 1

            if current_frame % 6 != 0:
                current_frame += 1
                continue

            current_frame += 1

            if ret:
                start = time.time()

                frame = cv2.resize(frame, (width, height))

                mask_white1 = cv2.fillPoly(mask_uni, [points_show], (255, 255, 255))
                only_roi = cv2.bitwise_and(mask_white1, frame)

                # create blob from image
                blob = cv2.dnn.blobFromImage(image=only_roi, size=(300, 300), mean=(104, 117, 123), swapRB=True)

                model.setInput(blob)
                output = model.forward()

                for detection in output[0, 0, :, :]:
                    # extract the confidence of the detection
                    confidence = detection[2]
                    # draw bounding boxes only if the detection confidence is above...
                    # ... a certain threshold, else skip
                    if confidence > .3:
                        # get the class id
                        class_id = detection[1]
                        # map the class id to the class
                        class_name = class_names[int(class_id) - 1]

                        if class_name != 'car':
                            continue

                        color = COLORS[int(class_id)]
                        # get the bounding box coordinates
                        box_x = detection[3] * width
                        box_y = detection[4] * height
                        # get the bounding box width and height
                        box_width = detection[5] * width
                        box_height = detection[6] * height
                        # draw a rectangle around each detected object
                        cv2.rectangle(frame, (int(box_x), int(box_y)), (int(box_width), int(box_height)), color,
                                      thickness=2)
                        # put the class name text on the detected object
                        cv2.putText(frame, class_name, (int(box_x), int(box_y - 5)), font, 1, color,
                                    2)

                end = time.time()
                try:
                    fps = 1 / (end - start)
                except ZeroDivisionErrorn:
                    fps = 0

                cv2.putText(frame, f"{fps:.2f} FPS", (20, 30), font, 1, (0, 255, 0), 2)

                que.put(frame)
                if que.full() is True:
                    with que.mutex:
                        que.queue.clear()

    except Exception as e:
        print(e)

    cap.release()
    cv2.destroyAllWindows()


def show_frames():
    while True:
        if q.empty():
            show_frames()

        frame1 = q.get()

        ret, buffer = cv2.imencode('.jpeg', frame1)
        frame1 = buffer.tobytes()

        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame1 + b'\r\n')


@app.route('/thread')
def thread_value():
    t1 = Thread(target=analytics, args=[q])
    t1.daemon = True
    t1.start()

    return 'thread started'


@app.route('/feed')
def feed():
    return Response(show_frames(),
                    mimetype='multipart/x-mixed-replace; boundary=frame')


if __name__ == '__main__':
    app.run(host='127.0.0.1', port=7009, debug=True)

All possible approaches are fine with me.

Thanks in advance.

Upvotes: 0

Views: 283

Answers (1)

Jirayu Kaewprateep
Jirayu Kaewprateep

Reputation: 760

We can do with Windows desktop

It does not use that much power of calculation but anyway you may scale the problem, it required CPU only once loading it feed those image and start traning.

[ Sample ]:

import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import tensorflow as tf

import os
from os.path import exists

import numpy as np

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
global stacks_frames
stacks_frames = [ ]
stacks_frames.append( tf.zeros( [ 29, 39, 3 ] ).numpy() )
stacks_frames.append( tf.zeros( [ 29, 39, 3 ] ).numpy() )
stacks_frames.append( tf.zeros( [ 29, 39, 3 ] ).numpy() )
stacks_frames.append( tf.zeros( [ 29, 39, 3 ] ).numpy() )

fig = plt.figure()
image = plt.imread( "F:\\datasets\\downloads\\cats_name\\train\\Symbols\\01.jpg" )
im = plt.imshow( image )

list_actual_label = [ 'Shoes', 'Duck' ]

global video_capture_0
video_capture_0 = cv2.VideoCapture(0)

checkpoint_path = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\TF_DataSets_01.h5"
checkpoint_dir = os.path.dirname(checkpoint_path)
loggings = "F:\\models\\checkpoint\\" + os.path.basename(__file__).split('.')[0] + "\\loggings.log"

if not exists(checkpoint_dir) : 
    os.mkdir(checkpoint_dir)
    print("Create directory: " + checkpoint_dir)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
def f1( picture ):
    global stacks_frames
    image = tf.constant( picture ).numpy()
    stacks_frames.append( image )
    stacks_frames = stacks_frames[-4:]
    
    image_1 = stacks_frames[0][:,:,0:1] + ( 255 - stacks_frames[3][:,:,0:1] )
    image_2 = stacks_frames[0][:,:,1:2] + ( 255 - stacks_frames[3][:,:,1:2] )
    image_3 = stacks_frames[0][:,:,2:3] + ( 255 - stacks_frames[3][:,:,2:3] )
    image = tf.keras.layers.Concatenate(axis=2)([image_1, image_2])
    image = tf.keras.layers.Concatenate(axis=2)([image, image_3])
    image = tf.where( tf.greater_equal( image[:,:,0:1], tf.ones( [ 29, 39, 1 ] ) * 100 ), [255] , [0], name=None ).numpy()
    list_cells_values = [ ]
    list_cells_pos_values = [ ]
    for i in range(4):
        for j in range(4):
            cropped_image = tf.image.crop_to_bounding_box(image, 2 * i, 2 * j, 15, 16)
            list_cells_values.append([ tf.math.count_nonzero(cropped_image) ])
            list_cells_pos_values.append([ 2 * i, 2 * j, 15, 16 ])
    
    width = list_cells_pos_values[ int(tf.math.argmax( list_cells_values ).numpy()) ][0]
    height = list_cells_pos_values[ int(tf.math.argmax( list_cells_values ).numpy()) ][1]
    
    print( 'width: ' + str( width ) )
    print( 'height: ' + str( height ) )
    

    img = tf.constant( picture, shape=( 1, 29, 39, 3 ) ).numpy()
    box = np.array([0.0625 * width, 0.0625 * height, 1 - ( 0.0625 * width ), 1 - ( 0.0625 * height )])
    boxes = box.reshape([1, 1, 4])
    colors = np.array([[1.0, 0.0, 0.0]])
    image = tf.image.draw_bounding_boxes(img, boxes, colors).numpy()
    image = tf.constant( image, shape=( 29, 39, 3 ) ).numpy()
    image = tf.keras.preprocessing.image.array_to_img(
        image,
        data_format=None,
        scale=True
    )
    
    return image

def animate( i ):
    ret0, frame0 = video_capture_0.read()
    if (ret0):      
        
        frame0 = tf.image.resize(frame0, [29, 39]).numpy()
        
        temp = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,2:3])
        temp2 = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,1:2])
        temp3 = img_array = tf.keras.preprocessing.image.img_to_array(frame0[:,:,0:1])

        temp = tf.keras.layers.Concatenate(axis=2)([temp, temp2])
        temp = tf.keras.layers.Concatenate(axis=2)([temp, temp3])
        temp = f1( temp )
        
        im.set_array( temp )
        result = predict_action( temp )
        print( list_actual_label[result] )
        
    return im,

def predict_action ( image ) :
    predictions = model.predict(tf.constant(image, shape=(1, 29, 39, 3) , dtype=tf.float32))
    result = tf.math.argmax(predictions[0])
    return result

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Initialize
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=( 29, 39, 3 )),
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Reshape((234, 32)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True, return_state=False)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
])
        
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64))
model.add(tf.keras.layers.Dense(2))
model.summary()

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
DataSet
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
list_picture = []
list_label = []
path_1 = "F:\\datasets\\downloads\\Duck_Shoe_2\\Duck\\"
path_2 = "F:\\datasets\\downloads\\Duck_Shoe_2\\Shoes\\"

for file in os.listdir( path_1 ):
    image = plt.imread( path_1 + file )
    image = tf.image.resize(image, [29, 39]).numpy()

    for i in range ( 40 ) :
        if i % 6 == 0 :
            layer = tf.keras.layers.RandomZoom(.5, .2)
            image = layer( image ).numpy()
            list_picture.append( image )
        elif i % 5 == 0 :
            image = tf.image.random_hue(image, 0.2).numpy()
            image = tf.image.random_flip_up_down(image, 1).numpy()
            list_picture.append( image )
        elif i % 4 == 0 :
            image = tf.image.random_saturation(image, 5, 10, 1).numpy()
            image = tf.image.random_flip_left_right(image, 1).numpy()
            list_picture.append( image )
        elif i % 3 == 0 :
            image = tf.image.random_flip_up_down(image, 1).numpy()
            image = tf.image.random_saturation(image, 5, 10, 1).numpy()
            list_picture.append( image )
        elif i % 2 == 0 :
            image = tf.image.random_flip_left_right(image, 1).numpy()
            image = tf.image.random_hue(image, 0.2).numpy()
            list_picture.append( image )
        else :
            list_picture.append( image )
        
        list_label.append( 1 )

for file in os.listdir( path_2 ):
    image = plt.imread( path_2 + file )
    image = tf.image.resize(image, [29, 39]).numpy()
    
    for i in range ( 40 ) :
        if i % 6 == 0 :
            layer = tf.keras.layers.RandomZoom(.5, .2)
            image = layer( image ).numpy()
            list_picture.append( image )
        elif i % 5 == 0 :
            image = tf.image.random_hue(image, 0.2).numpy()
            image = tf.image.random_flip_up_down(image, 1).numpy()
            list_picture.append( image )
        elif i % 4 == 0 :
            image = tf.image.random_saturation(image, 5, 10, 1).numpy()
            image = tf.image.random_flip_left_right(image, 1).numpy()
            list_picture.append( image )
        elif i % 3 == 0 :
            image = tf.image.random_flip_up_down(image, 1).numpy()
            image = tf.image.random_saturation(image, 5, 10, 1).numpy()
            list_picture.append( image )
        elif i % 2 == 0 :
            image = tf.image.random_flip_left_right(image, 1).numpy()
            image = tf.image.random_hue(image, 0.2).numpy()
            list_picture.append( image )
        else :
            list_picture.append( image )
            
        list_label.append( 0 )

dataset = tf.data.Dataset.from_tensor_slices((tf.constant([list_picture], shape=(len(list_picture), 1, 29, 39, 3), dtype=tf.float32),tf.constant([list_label], shape=(len(list_picture), 1, 1, 1), dtype=tf.int64)))

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Optimizer
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
optimizer = tf.keras.optimizers.Nadam( learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, name='Nadam' )

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Loss Fn
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""                               
lossfn = tf.keras.losses.MeanSquaredLogarithmicError(reduction=tf.keras.losses.Reduction.AUTO, name='mean_squared_logarithmic_error')

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Model Summary
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
model.compile(optimizer=optimizer, loss=lossfn, metrics=['accuracy'])

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: FileWriter
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
if exists(checkpoint_path) :
    model.load_weights(checkpoint_path)
    print("model load: " + checkpoint_path)
    input("Press Any Key!")

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Training
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
history = model.fit(dataset, epochs=2 ,validation_data=(dataset))

while True:
    ani = animation.FuncAnimation(fig, animate, interval=50, blit=True)
    plt.show()

# When everything is done, release the capture
video_capture_0.release()
cv2.destroyAllWindows()

input('...')

[ Output ]:

Sample

Upvotes: 1

Related Questions