LoginSignup
0
2

More than 3 years have passed since last update.

OpenCV DNN backend Inference Engine (OpenVINO: Python, C++)

Last updated at Posted at 2021-02-09

Introduction

OpenCV DNN の OpenVINO backend について、Python と C++ を並べたサンプルが見当たらないので置いてみた。同一と言っても過言ではない。。。よくできている。

OpenVINO Pre-Trained Model

入力: [1x3x384x672] - [BxCxHxW]
出力: [1, 1, N, 7] - [image_id, label, conf, x_min, y_min, x_max, y_max]

Python

facedetect.py
import argparse
import cv2 as cv

def facedetect(device, threshold):

    # load model
    net = cv.dnn.readNet('face-detection-adas-0001.xml',
                         'face-detection-adas-0001.bin')

    # input size of Neural Nwtwork
    inputsize = (672, 384)

    # output name
    outNames = cv.dnn_Net.getLayerNames(net)

    # select backend
    net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)

    # select inference device
    if device == 'MYRIAD':
        net.setPreferableTarget(cv.dnn.DNN_TARGET_MYRIAD)
    elif device == 'CPU':
        net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
    else:
        print("Unkown device: " + device)
        exit(0)

    # capture device
    cap = cv.VideoCapture(0)

    # set capcture image size
    cap.set(cv.CAP_PROP_FRAME_WIDTH, 800)
    cap.set(cv.CAP_PROP_FRAME_HEIGHT, 600)

    while cv.waitKey(16) != 27:

        _, frame = cap.read()

        # create a 4D blob from a frame.
        blob = cv.dnn.blobFromImage(frame, size=inputsize, ddepth=cv.CV_8U)

        # set the input to the network
        net.setInput(blob)

        # inference
        out = net.forward(outNames[0])

        # [1, 1, N, 7] -> [N, 7]
        for detection in out.reshape(-1, 7):
            confidence = float(detection[2])

            if confidence > threshold:
                xmin = int(detection[3] * frame.shape[1])
                ymin = int(detection[4] * frame.shape[0])
                xmax = int(detection[5] * frame.shape[1])
                ymax = int(detection[6] * frame.shape[0])
                cv.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        cv.imshow('facedetect', frame)


def main():
    parser = argparse.ArgumentParser(description='Facedetect with OpenCV DNN API (OpenVINO)')
    parser.add_argument('--device', '-d', default="CPU")
    parser.add_argument('--threshold', '-t', type=float, default=0.9)
    args = parser.parse_args()

    facedetect(args.device, args.threshold)

if __name__ == '__main__':
    main()

C++

facedetect.cpp
#include <iostream>
#include <stdexcept>

#include <opencv2/opencv.hpp>
#include <gflags/gflags.h>

void facedetect(std::string device, double threshold)
{
  // load model
  cv::dnn::Net net = cv::dnn::readNet("face-detection-adas-0001.xml",
                                      "face-detection-adas-0001.bin");

  // input size of Neural Network
  cv::Size inputsize(672, 384);

  // output name
  std::vector<std::string> outNames = net.getUnconnectedOutLayersNames();

  // select backend
  net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE);

  // select inference device
  if (device == "MYRIAD") {
    net.setPreferableTarget(cv::dnn::DNN_TARGET_MYRIAD);
  } else if (device == "CPU") {
    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
  } else {
    throw std::runtime_error("Unknown device: " + device);
  }

  // capture device
  cv::VideoCapture cap;
  cap.open(0);

  // set capture image size
  cap.set(cv::CAP_PROP_FRAME_WIDTH, 800);
  cap.set(cv::CAP_PROP_FRAME_HEIGHT, 600);

  cv::Mat frame;

  while (cv::waitKey(16) != 27) {

    // capture 1 frame
    cap >> frame;

    // create a 4D blob from a frame.
    cv::Mat blob;
    cv::dnn::blobFromImage(frame, blob, 1.0, inputsize, cv::Scalar(), false, false);

    // set the input to the network
    net.setInput(blob);

    // inference
    cv::Mat out = net.forward(outNames[0]);

    // [1, 1, N, 7] -> [N * 7]
    float* data = (float *)out.data; // .reshape(-1)

    for (int i = 0; i < out.total(); i += 7) {
      float confidence = data[i + 2];

      if (confidence > threshold) {
        int x_min = data[i + 3] * frame.cols;
        int y_min = data[i + 4] * frame.rows;
        int x_max = data[i + 5] * frame.cols;
        int y_max = data[i + 6] * frame.rows;
        cv::rectangle(frame, cv::Point(x_min, y_min), cv::Point(x_max, y_max), cv::Scalar(0, 255, 0), 2);
      }
    }
    cv::imshow("facedetect", frame);
  }
}

DEFINE_string(d, "CPU", "device: CPU or MYRIAD");
DEFINE_double(t, 0.9, "threshold");

int main(int argc, char *argv[])
{
  try {

    gflags::ParseCommandLineFlags(&argc, &argv, true);

    facedetect(FLAGS_d, FLAGS_t);

    return EXIT_SUCCESS;
  }

  // -*- error -*- //

  catch (std::bad_alloc &e) {
    std::cerr << "BAD ALLOC Exception : " << e.what() << std::endl;
    return EXIT_FAILURE;
  }

  catch (const std::exception& e) {
    std::cerr << "Error: "  << e.what() << std::endl;
    return EXIT_FAILURE;
  }

  catch (...) {
    std::cerr << "unknown exception" << std::endl;
    return EXIT_FAILURE;
  }
}

おんなじじゃん!

Reference

Note

非同期の forwardAsync() で複数出力対応してくんないかなあ。

0
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
2