Edited at

RaspberryPi3(USB2.0)とLaptopPC(USB3.1)でGoogle Edge TPU Acceleratorを使用してMobileNet-SSD v2の動作スピードを検証してみました(MS-COCO)

TPU-MobilenetSSD GitHub stars


1.はじめに

Edge TPU Accelerator を手に入れましたので、早速 NCS2 との性能差を比較するため、 MobileNet-SSD v2 (MS-COCO) を同期処理と非同期処理で実装してみました。

bbb.png


2.導入手順

とても簡単で一瞬で終わります。


Install_the_Edge_TPU_Python_library

$ wget http://storage.googleapis.com/cloud-iot-edge-pretrained-models/edgetpu_api.tar.gz

$ tar xzf edgetpu_api.tar.gz
$ cd python-tflite-source
$ bash ./install.sh


3.テスト用プログラムで味見


Run_test

# From the python-tflite-source directory

cd edgetpu/

python3 demo/classify_image.py \
--model test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite \
--label test_data/inat_bird_labels.txt \
--image test_data/parrot.jpg


parrot.jpg

ふーん、という感じ。


Results

---------------------------

Ara macao (Scarlet Macaw)
Score : 0.61328125
---------------------------
Platycercus elegans (Crimson Rosella)
Score : 0.15234375

参考URL

Built for the Edge TPU - Models - URL

Edge TPU API overview & demos - URL


4.検証結果


4−1.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Sync mode (同期処理)

データ・セットは 90クラス の MS-COCO です。

ezgif.com-video-to-gif (6).gif


MobileNet-SSD-TPU-sync.py

import argparse

import platform
import numpy as np
import cv2
import time
from PIL import Image
from edgetpu.detection.engine import DetectionEngine

# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()

fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0

camera_width = 320
camera_height = 240

cap = cv2.VideoCapture(args.usbcamno)
cap.set(cv2.CAP_PROP_FPS, 150)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)

# Initialize engine.
engine = DetectionEngine(args.model)
labels = ReadLabelFile(args.label) if args.label else None

while True:
t1 = time.perf_counter()

ret, color_image = cap.read()
if not ret:
break

# Run inference.
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)

tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")

# Display result.
if ans:
detectframecount += 1
for obj in ans:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(color_image, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)

percentage = int(obj.score * 100)
label_text = labels[obj.label_id] + " (" + str(percentage) + "%)"

label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(color_image, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(color_image, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)

cv2.putText(color_image, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(color_image, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)

cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE)
cv2.imshow('USB Camera', color_image)

if cv2.waitKey(1)&0xFF == ord('q'):
break

# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime

if __name__ == '__main__':
main()



4−2.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Async mode (非同期マルチプロセス処理)

こちらも、データ・セットは 90クラス の MS-COCO です。

ezgif.com-video-to-gif (7).gif


MobileNet-SSD-TPU-async.py

import argparse

import platform
import numpy as np
import cv2
import time
from PIL import Image
from time import sleep
import multiprocessing as mp
from edgetpu.detection.engine import DetectionEngine

lastresults = None
processes = []
frameBuffer = None
results = None
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0

# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret

def camThread(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno):

global fps
global detectfps
global framecount
global detectframecount
global time1
global time2
global lastresults
global cam
global window_name

cam = cv2.VideoCapture(usbcamno)
cam.set(cv2.CAP_PROP_FPS, vidfps)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
window_name = "USB Camera"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)

while True:
t1 = time.perf_counter()

ret, color_image = cam.read()
if not ret:
continue
if frameBuffer.full():
frameBuffer.get()
frames = color_image
frameBuffer.put(color_image.copy())
res = None

if not results.empty():
res = results.get(False)
detectframecount += 1
imdraw = overlay_on_image(frames, res, label, camera_width, camera_height)
lastresults = res
else:
imdraw = overlay_on_image(frames, lastresults, label, camera_width, camera_height)

cv2.imshow('USB Camera', imdraw)

if cv2.waitKey(1)&0xFF == ord('q'):
break

# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime

def inferencer(results, frameBuffer, model, camera_width, camera_height):

engine = DetectionEngine(model)

while True:

if frameBuffer.empty():
continue

# Run inference.
color_image = frameBuffer.get()
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)

tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")
results.put(ans)

def overlay_on_image(frames, object_infos, label, camera_width, camera_height):

color_image = frames

if isinstance(object_infos, type(None)):
return color_image
img_cp = color_image.copy()

for obj in object_infos:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(img_cp, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)

percentage = int(obj.score * 100)
label_text = label[obj.label_id] + " (" + str(percentage) + "%)"

label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(img_cp, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(img_cp, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)

cv2.putText(img_cp, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(img_cp, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)

return img_cp

if __name__ == '__main__':

parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()

model = args.model
label = ReadLabelFile(args.label)
usbcamno = args.usbcamno

camera_width = 320
camera_height = 240
vidfps = 30

try:
mp.set_start_method('forkserver')
frameBuffer = mp.Queue(10)
results = mp.Queue()

# Start streaming
p = mp.Process(target=camThread,
args=(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno),
daemon=True)
p.start()
processes.append(p)

# Activation of inferencer
p = mp.Process(target=inferencer,
args=(results, frameBuffer, model, camera_width, camera_height),
daemon=True)
p.start()
processes.append(p)

while True:
sleep(1)

finally:
for p in range(len(processes)):
processes[p].terminate()



4−3.MobileNet-SSD v2 + Core i7 + USB 3.1 + TPU + Async mode (非同期マルチプロセス処理)

同じモデルとデータセットですが、めちゃくちゃ速いです。。。

60 FPS - 80 FPS の間で揺らいでいますが、転送レートが上がるだけでココまで差がでるとは。。。

速すぎてフレームレートが人力では読めません。。。

12 ms から 15 ms で1フレームを推論できるようです。

ezgif.com-optimize (25).gif


5.おわりに

実装が悪いのか、RaspberryPi3 では、画像の転送部分で USB 2.0 がボトルネックになりすぎてかなり遅いです。

NCS2 の 4リクエスト並列処理のほうが2倍のパフォーマンスが出ていました。

もう少しプログラムのブラッシュアップが必要かもしれませんが、USB 3.0 で本来の性能が出ていそうです。

なお、 USB 3.1 の同期処理では 48 FPS 出ました。

USB 3.1 の非同期処理では上述のとおり 60 FPS - 80 FPS 出ました。

<2019.03.12 追記>

LattePanda Alpha Core m3 + USB 3.0 + Google Edge TPU Accelerator +

MobileNet-SSD v2 + Async mode 640x480

about 60 - 80 FPS


https://youtu.be/OFEQHCQ5MsM

https://twitter.com/PINTO03091/status/1105120574316044290

LattePanda Alpha Core m3 + USB 3.0 + Google Edge TPU Accelerator +

MobileNet-SSD v2 + Async mode 320x240

about 80 - 90 FPS


https://youtu.be/LERXuDXn0kY

https://twitter.com/PINTO03091/status/1105120075156152320