More than 3 years have passed since last update.

XavierNX上でyolov3, yolov3-spp, yolov4を比較

Last updated at 2020-05-30Posted at 2020-05-29

はじめに

darknetでの実装とOpenCV DNNでの実装の比較を
Yolov3をDarknetとOpenCVのDNNで実行し速度比較で比較しました。
ここでは、YOLOv3 , YOLOv4 , YOLOv3-sppの速度と認識率？　の比較をしてみました。

条件

測定環境

XavierNXでnvpmodel -m 2で実施
DarknetをDocker上で実施Jetson Xavier NX/JetsonNANOでDNN_BACKEND_CUDAが使えるOpenCV4.3をDockerでビルド+Darknetをビルド済みなXavierNX/JetsonNANOのDockerイメージを作成で作って実行したコンテナ上で実施
Youtubeの野鳥のビデオを1/30にフレームを間引いたものを利用（詳細はYolov3をDarknetとOpenCVのDNNで実行し速度比較)
見つけた鳥の数、平均フレームレートを比較
yolov4 とyolov3-spp　はデフォルトの設定だとネットワークサイズが608ｘ608になっているので、条件をそろえるためにyolov3と同じ416 x 416に変更して測定

手順

docker 上でdv.py(後述)を利用してフレームレートと鳥の認識数を比較
dv.py内のMODELとCFGをそれぞれに合わせて変更して測定
*.cfgファイルのネットワークサイズを416ｘ416にして測定

結果

MODEL	FPS	Total no.of birds
YOLOv3	10.55	1260
YOLOv3-SPP	10.11	1414
YOLOv4	8.45	1634
フレームレートがYolov3をDarknetとOpenCVのDNNで実行し速度比較に比べ少しあがったのは、標準出力をteeにパイプしたことによる表示負荷の減少のためだと思います。

全フレームはみられないんので、カウンタに差がでたフレームのうちの頭の何フレームかチェックしたところ、
YOLOv3-SPP,YOLOv4は初めのルリビタキが首をひねっても鳥として認識しているのにもかかわらずYOLOv3は鳥と識別できていませんでした。

5/30アップデート

以下ビデオでやってみました、＃クリエイティブコモンズライセンス
峠の鳥たち信越の峠 9月中旬野鳥4K 空屋根FILMS#1098
以下違いがでたフレームの抜粋です。＃パフォーマンス試験したのとビデオが違います。元はライセンスの関係で中身をここに乗せられませんでした。
圧縮がかかって時がみにくいですが、左からyolov3 , yolov3-spp , yolov4です。
アノテーションは鳥以外もでてるので、それで違いがでてることもあります。

dv.py

from ctypes import *
import math
import random
import os
import cv2
import numpy as np
import time
import darknet

def convertBack(x, y, w, h):
    xmin = int(round(x - (w / 2)))
    xmax = int(round(x + (w / 2)))
    ymin = int(round(y - (h / 2)))
    ymax = int(round(y + (h / 2)))
    return xmin, ymin, xmax, ymax


def cvDrawBoxes(detections, img):
    found=0
    for detection in detections:
        x, y, w, h = detection[2][0],\
            detection[2][1],\
            detection[2][2],\
            detection[2][3]
        xmin, ymin, xmax, ymax = convertBack(
            float(x), float(y), float(w), float(h))
        pt1 = (xmin, ymin)
        pt2 = (xmax, ymax)
        cv2.rectangle(img, pt1, pt2, (0, 255, 0), 1)
        cv2.putText(img,
                    detection[0].decode() +
                    " [" + str(round(detection[1] * 100, 2)) + "]",
                    (pt1[0], pt1[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                    [0, 255, 0], 2)
        if detection[0]==b'bird':
            found=found+1

    return img,found


netMain = None
metaMain = None
altNames = None



def YOLO():

    global metaMain, netMain, altNames
    configPath = "./cfg/yolov3.cfg"
    weightPath = "./yolov3.weights"
    metaPath = "./cfg/coco.data"
    if not os.path.exists(configPath):
        raise ValueError("Invalid config path `" +
                         os.path.abspath(configPath)+"`")
    if not os.path.exists(weightPath):
        raise ValueError("Invalid weight path `" +
                         os.path.abspath(weightPath)+"`")
    if not os.path.exists(metaPath):
        raise ValueError("Invalid data file path `" +
                         os.path.abspath(metaPath)+"`")
    if netMain is None:
        netMain = darknet.load_net_custom(configPath.encode(
            "ascii"), weightPath.encode("ascii"), 0, 1)  # batch size = 1
    if metaMain is None:
        metaMain = darknet.load_meta(metaPath.encode("ascii"))
    if altNames is None:
        try:
            with open(metaPath) as metaFH:
                metaContents = metaFH.read()
                import re
                match = re.search("names *= *(.*)$", metaContents,
                                  re.IGNORECASE | re.MULTILINE)
                if match:
                    result = match.group(1)
                else:
                    result = None
                try:
                    if os.path.exists(result):
                        with open(result) as namesFH:
                            namesList = namesFH.read().strip().split("\n")
                            altNames = [x.strip() for x in namesList]
                except TypeError:
                    pass
        except Exception:
            pass
#    cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture("output.avi")
    maxFrame=cap.get( cv2.CAP_PROP_FRAME_COUNT)
    print("maxframe=",maxFrame)
    print("Starting the YOLO loop...")

    # Create an image we reuse for each detect
    darknet_image = darknet.make_image(darknet.network_width(netMain),
                                    darknet.network_height(netMain),3)
    fpsgt=0
    frameTotal=0
    foundTotal=0
    while frameTotal < maxFrame-10:
        prev_time = time.time()
        ret, frame_read = cap.read()
        frame_rgb = cv2.cvtColor(frame_read, cv2.COLOR_BGR2RGB)
        frame_resized = cv2.resize(frame_rgb,
                                   (darknet.network_width(netMain),
                                    darknet.network_height(netMain)),
                                   interpolation=cv2.INTER_LINEAR)

        darknet.copy_image_from_bytes(darknet_image,frame_resized.tobytes())

        detections = darknet.detect_image(netMain, metaMain, darknet_image, thresh=0.25)
        image ,found= cvDrawBoxes(detections, frame_resized)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        fps = 1/(time.time()-prev_time)
        fpsgt = fpsgt + fps
        frameTotal=frameTotal+1
        foundTotal=foundTotal+found
        print( "AVG FPS="+str(fpsgt / frameTotal) + "  FRAME="+str(frameTotal)+"  BIRD="+str(foundTotal)," ",frameTotal/maxFrame*100,"%complreted")
#        cv2.imshow('Demo', image)
#        cv2.waitKey(3)
    cap.release()
#    out.release()

if __name__ == "__main__":
    YOLO()

フレーム確認方法（手作業。。。）

dv.pyからは

AVG FPS=9.889294695463574  FRAME=35  BIRD=31   3.43811394891945 %complreted
AVG FPS=9.904231780448072  FRAME=36  BIRD=32   3.536345776031434 %complreted
AVG FPS=9.912348439462216  FRAME=37  BIRD=33   3.6345776031434185 %complreted
AVG FPS=9.925056880049333  FRAME=38  BIRD=34   3.732809430255403 %complreted
AVG FPS=9.940727948116814  FRAME=39  BIRD=35   3.831041257367387 %complreted
AVG FPS=9.953403005960066  FRAME=40  BIRD=36   3.9292730844793713 %complreted
AVG FPS=9.966885726507298  FRAME=41  BIRD=37   4.027504911591356 %complreted

モデル毎にFRAMEの数字とBIRDの数字を比較することで食い違いがでたフレームが見つけれます。
一方フレーム毎の画像は以下のスクリプトで　[FrameNo].jpgの形でセーブすることができるので、今の段階ではこの2つのデータから手で食い違いを確認しました。

convtojpg.py

import cv2
import numpy as np
import time
import darknet



def conv():

    #cap = cv2.VideoCapture(0)
    cap = cv2.VideoCapture("output.avi")
    totalFrame = cap.get( cv2.CAP_PROP_FRAME_COUNT)
    fnum=0
    while fnum < totalFrame:
        ret, outimage = cap.read()
        cv2.imshow( 'read',outimage )
        cv2.waitKey(1)
        print( str( fnum / totalFrame *100)+"%")

        fnum=fnum+1
        cv2.imwrite( "jpg/"+str(fnum)+".jpg",outimage )

    cap.release()
    out.release()

if __name__ == "__main__":
    conv()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up