1.はじめに
Edge TPU Accelerator を手に入れましたので、早速 NCS2 との性能差を比較するため、 MobileNet-SSD v2 (MS-COCO) を同期処理と非同期処理で実装してみました。
2.導入手順
とても簡単で一瞬で終わります。
$ wget http://storage.googleapis.com/cloud-iot-edge-pretrained-models/edgetpu_api.tar.gz
$ tar xzf edgetpu_api.tar.gz
$ cd python-tflite-source
$ bash ./install.sh
3.テスト用プログラムで味見
# From the python-tflite-source directory
cd edgetpu/
python3 demo/classify_image.py \
--model test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite \
--label test_data/inat_bird_labels.txt \
--image test_data/parrot.jpg
ふーん、という感じ。
---------------------------
Ara macao (Scarlet Macaw)
Score : 0.61328125
---------------------------
Platycercus elegans (Crimson Rosella)
Score : 0.15234375
参考URL
Built for the Edge TPU - Models - URL
Edge TPU API overview & demos - URL
4.検証結果
4−1.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Sync mode (同期処理)
import argparse
import platform
import numpy as np
import cv2
import time
from PIL import Image
from edgetpu.detection.engine import DetectionEngine
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0
camera_width = 320
camera_height = 240
cap = cv2.VideoCapture(args.usbcamno)
cap.set(cv2.CAP_PROP_FPS, 150)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
# Initialize engine.
engine = DetectionEngine(args.model)
labels = ReadLabelFile(args.label) if args.label else None
while True:
t1 = time.perf_counter()
ret, color_image = cap.read()
if not ret:
break
# Run inference.
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)
tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")
# Display result.
if ans:
detectframecount += 1
for obj in ans:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(color_image, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
percentage = int(obj.score * 100)
label_text = labels[obj.label_id] + " (" + str(percentage) + "%)"
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(color_image, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(color_image, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
cv2.putText(color_image, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(color_image, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE)
cv2.imshow('USB Camera', color_image)
if cv2.waitKey(1)&0xFF == ord('q'):
break
# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime
if __name__ == '__main__':
main()
4−2.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Async mode (非同期マルチプロセス処理)
こちらも、データ・セットは 90クラス の MS-COCO
です。
import argparse
import platform
import numpy as np
import cv2
import time
from PIL import Image
from time import sleep
import multiprocessing as mp
from edgetpu.detection.engine import DetectionEngine
lastresults = None
processes = []
frameBuffer = None
results = None
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def camThread(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno):
global fps
global detectfps
global framecount
global detectframecount
global time1
global time2
global lastresults
global cam
global window_name
cam = cv2.VideoCapture(usbcamno)
cam.set(cv2.CAP_PROP_FPS, vidfps)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
window_name = "USB Camera"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
while True:
t1 = time.perf_counter()
ret, color_image = cam.read()
if not ret:
continue
if frameBuffer.full():
frameBuffer.get()
frames = color_image
frameBuffer.put(color_image.copy())
res = None
if not results.empty():
res = results.get(False)
detectframecount += 1
imdraw = overlay_on_image(frames, res, label, camera_width, camera_height)
lastresults = res
else:
imdraw = overlay_on_image(frames, lastresults, label, camera_width, camera_height)
cv2.imshow('USB Camera', imdraw)
if cv2.waitKey(1)&0xFF == ord('q'):
break
# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime
def inferencer(results, frameBuffer, model, camera_width, camera_height):
engine = DetectionEngine(model)
while True:
if frameBuffer.empty():
continue
# Run inference.
color_image = frameBuffer.get()
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)
tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")
results.put(ans)
def overlay_on_image(frames, object_infos, label, camera_width, camera_height):
color_image = frames
if isinstance(object_infos, type(None)):
return color_image
img_cp = color_image.copy()
for obj in object_infos:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(img_cp, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
percentage = int(obj.score * 100)
label_text = label[obj.label_id] + " (" + str(percentage) + "%)"
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(img_cp, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(img_cp, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
cv2.putText(img_cp, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(img_cp, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
return img_cp
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()
model = args.model
label = ReadLabelFile(args.label)
usbcamno = args.usbcamno
camera_width = 320
camera_height = 240
vidfps = 30
try:
mp.set_start_method('forkserver')
frameBuffer = mp.Queue(10)
results = mp.Queue()
# Start streaming
p = mp.Process(target=camThread,
args=(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno),
daemon=True)
p.start()
processes.append(p)
# Activation of inferencer
p = mp.Process(target=inferencer,
args=(results, frameBuffer, model, camera_width, camera_height),
daemon=True)
p.start()
processes.append(p)
while True:
sleep(1)
finally:
for p in range(len(processes)):
processes[p].terminate()
4−3.MobileNet-SSD v2 + Core i7 + USB 3.1 + TPU + Async mode (非同期マルチプロセス処理)
同じモデルとデータセットですが、めちゃくちゃ速いです。。。
60 FPS - 80 FPS
の間で揺らいでいますが、転送レートが上がるだけでココまで差がでるとは。。。
速すぎてフレームレートが人力では読めません。。。
12 ms から 15 ms
で1フレームを推論できるようです。
5.おわりに
実装が悪いのか、RaspberryPi3 では、画像の転送部分で USB 2.0 がボトルネックになりすぎてかなり遅いです。
NCS2 の 4リクエスト並列処理のほうが2倍のパフォーマンスが出ていました。
もう少しプログラムのブラッシュアップが必要かもしれませんが、USB 3.0 で本来の性能が出ていそうです。
なお、 USB 3.1 の同期処理では 48 FPS
出ました。
USB 3.1 の非同期処理では上述のとおり 60 FPS - 80 FPS
出ました。
<2019.03.12 追記>
LattePanda Alpha Core m3 + USB 3.0 + Google Edge TPU Accelerator +
MobileNet-SSD v2 + Async mode 640x480
about 60 - 80 FPS
https://youtu.be/OFEQHCQ5MsM
https://twitter.com/PINTO03091/status/1105120574316044290
LattePanda Alpha Core m3 + USB 3.0 + Google Edge TPU Accelerator +
MobileNet-SSD v2 + Async mode 320x240
about 80 - 90 FPS
https://youtu.be/LERXuDXn0kY
https://twitter.com/PINTO03091/status/1105120075156152320