60. Invoking Actions After Detection

Just being able to detect things won’t be useful, so I created a function to invoke some kind of action under a certain condition.
Thanks to Edge Electronics’s video, I was able to understand how to structure actions after detection. The procedures I took were the following.

1. Retrain SSD_Inception (TensorFlow1.15) to detect my smartphone

2. Export model as a frozen_graph.pb file

3. Load the model file in the python file that will do the detection

4. For each frame, run inference, extract coordinates from results and decide whether the smartphone is inside the box

Ok, so what should I create next that would be ACTUALLY  useful?

# Import packages
from models.research.object_detection.utils import visualization_utils as vis_util
from models.research.object_detection.utils import label_map_util
import os
import cv2
import numpy as np
import tensorflow as tf
import sys

# Set up camera constants
IM_WIDTH = 1280
IM_HEIGHT = 720

# Select camera type
camera_type = 'usb'

#### Initialize TensorFlow model ####
# This is needed since the working directory is the object_detection folder.
sys.path.append('..')

# Grab path to current working directory
CWD_PATH = os.getcwd()

# Path to frozen detection graph .pb file, which contains the model that is used
# for object detection.
PATH_TO_CKPT = os.path.join(
    CWD_PATH, 'ssd_inception_retrained_frozen_inference_graph.pb')

# Path to label map file
PATH_TO_LABELS = os.path.join(CWD_PATH, 'label_map.pbtxt')

# Number of classes the object detector can identify
NUM_CLASSES = 1

# Load the label map.
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# Load the Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
    sess = tf.Session(graph=detection_graph)


# Define input and output tensors (i.e. data) for the object detection classifier

# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')

#### Initialize other parameters ####

# Initialize frame rate calculation
frame_rate_calc = 1
freq = cv2.getTickFrequency()
font = cv2.FONT_HERSHEY_SIMPLEX

# Define counting box coordinates (top left and bottom right)
TL_outside = (int(IM_WIDTH*0.46), int(IM_HEIGHT*0.25))
BR_outside = (int(IM_WIDTH*0.8), int(IM_HEIGHT*.85))

# Initialize control variables used for Smartphone detector
detected_smartphone = False
smartphone_counter = 0

#### Smartphone detection function ####


def smartphone_detector(frame):

    # Use globals for the control variables so they retain their value after function exits
    global detected_smartphone
    global smartphone_counter

    # Turn frame to np array
    frame_expanded = np.expand_dims(frame, axis=0)

    # Perform the actual detection by running the model with the image as input
    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: frame_expanded})

    # Draw the results of the detection (aka 'visualize the results')
    vis_util.visualize_boxes_and_labels_on_image_array(
        frame,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=2,
        min_score_thresh=0.80)

    # Draw boxes defining locations when to detect boxes.
    cv2.rectangle(frame, TL_outside, BR_outside, (182, 71, 43), 2)
    cv2.putText(frame, "Put Smartphone Here",
                (TL_outside[0]+10, TL_outside[1]-10), font, 1, (182, 71, 43), 1, cv2.LINE_AA)

    # Check the class of the top detected object by looking at classes[0][0].
    # If the top detected object is a smartphone (1),
    # find its center coordinates by looking at the boxes[0][0] variable.
    # boxes[0][0] variable holds coordinates of detected objects as (ymin, xmin, ymax, xmax)
    score_result = np.squeeze(scores)[0]
    score_thresh = 0.7
    if (int(classes[0][0]) == 1) and (score_result > score_thresh):
        x = int(((boxes[0][0][1]+boxes[0][0][3])/2)*IM_WIDTH)
        y = int(((boxes[0][0][0]+boxes[0][0][2])/2)*IM_HEIGHT)

        # If object is in counting box, increment smartphone_counter variable
        if ((x > TL_outside[0]) and (x < BR_outside[0]) and (y > TL_outside[1]) and (y < BR_outside[1])):
            smartphone_counter += 1

        else:
            smartphone_counter = 0

    # Display Comment when Smartphone is inside the box
    if smartphone_counter > 0 and smartphone_counter < 30:
        detected_smartphone = True
        cv2.putText(frame, 'Found Your Smartphone!', (int(IM_WIDTH*.45),
                                                      int(IM_HEIGHT*.1)), font, 1, (63, 172, 41), 2, cv2.LINE_AA)

    # Display Comment when Smartphone is inside the box for more than 5 seconds
    if smartphone_counter > 30 and smartphone_counter < 60:
        detected_smartphone = True
        cv2.putText(frame, 'Smartphone Detected For More than 5 Seconds', (int(IM_WIDTH*.3),
                                                                           int(IM_HEIGHT*.1)), font, 1, (63, 172, 41), 2, cv2.LINE_AA)

    # Display Comment when Smartphone is inside the box for more than 10 seconds
    if smartphone_counter > 60:
        detected_smartphone = True
        cv2.putText(frame, 'Smartphone Detected For More than 10 Seconds', (int(IM_WIDTH*.3),
                                                                            int(IM_HEIGHT*.1)), font, 1, (63, 172, 41), 2, cv2.LINE_AA)

    # Draw counter info
    cv2.putText(frame, 'Detection counter: ' + str(smartphone_counter),
                (10, 100), font, 0.5, (182, 71, 43), 1, cv2.LINE_AA)

    return frame

#### Initialize camera and perform object detection ####


if camera_type == 'usb':
    # Initialize USB webcam feed
    camera = cv2.VideoCapture(0)
    ret = camera.set(3, IM_WIDTH)
    ret = camera.set(4, IM_HEIGHT)

    # Continuously capture frames and perform object detection on them
    while(True):

        t1 = cv2.getTickCount()

        # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
        # i.e. a single-column array, where each item in the column has the pixel RGB value
        ret, frame = camera.read()

        # Pass frame into Smartphone detection function
        frame = smartphone_detector(frame)

        # Draw FPS
        cv2.putText(frame, "FPS: {0:.2f}".format(
            frame_rate_calc), (30, 50), font, 1, (182, 71, 43), 2, cv2.LINE_AA)

        # All the results have been drawn on the frame, so it's time to display it.
        cv2.imshow('Object detector', frame)

        # FPS calculation
        t2 = cv2.getTickCount()
        time1 = (t2-t1)/freq
        frame_rate_calc = 1/time1

        # Press 'q' to quit
        if cv2.waitKey(1) == ord('q'):
            break

    camera.release()

cv2.destroyAllWindows()