1. PINTO

    No comment

    PINTO
Changes in body
Source | HTML | Preview
@@ -1,381 +1,381 @@
# 1.はじめに
Edge TPU Accelerator を手に入れましたので、早速 NCS2 との性能差を比較するため、 MobileNet-SSD v2 (MS-COCO) を同期処理と非同期処理で実装してみました。
![bbb.png](https://qiita-image-store.s3.amazonaws.com/0/194769/582993ef-f86b-7be7-4afc-9bbbb174e829.png)
# 2.導入手順
とても簡単で一瞬で終わります。
```bash:Install_the_Edge_TPU_Python_library
$ wget http://storage.googleapis.com/cloud-iot-edge-pretrained-models/edgetpu_api.tar.gz
$ tar xzf edgetpu_api.tar.gz
$ cd python-tflite-source
$ bash ./install.sh
```
# 3.テスト用プログラムで味見
```bash:Run_test
# From the python-tflite-source directory
cd edgetpu/
python3 demo/classify_image.py \
--model test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite \
--label test_data/inat_bird_labels.txt \
--image test_data/parrot.jpg
```
![parrot.jpg](https://qiita-image-store.s3.amazonaws.com/0/194769/398b121f-6e2f-2e01-e3ed-5b3200fa9346.jpeg)
ふーん、という感じ。
```bash:Results
---------------------------
Ara macao (Scarlet Macaw)
Score : 0.61328125
---------------------------
Platycercus elegans (Crimson Rosella)
Score : 0.15234375
```
**参考URL**
**[Built for the Edge TPU - Models - URL](https://coral.withgoogle.com/models/)**
**[Edge TPU API overview & demos - URL](https://coral.withgoogle.com/tutorials/edgetpu-api/#api-demos)**
-# 4.結果
+# 4.検証結果
## 4−1.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Sync mode (同期処理)
データ・セットは **`90クラス の MS-COCO`** です。
![ezgif.com-video-to-gif (6).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/01581295-4267-23eb-5f81-a4f669a28406.gif)
```python:MobileNet-SSD-TPU-sync.py
import argparse
import platform
import numpy as np
import cv2
import time
from PIL import Image
from edgetpu.detection.engine import DetectionEngine
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0
camera_width = 320
camera_height = 240
cap = cv2.VideoCapture(args.usbcamno)
cap.set(cv2.CAP_PROP_FPS, 150)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
# Initialize engine.
engine = DetectionEngine(args.model)
labels = ReadLabelFile(args.label) if args.label else None
while True:
t1 = time.perf_counter()
ret, color_image = cap.read()
if not ret:
break
# Run inference.
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)
tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")
# Display result.
if ans:
detectframecount += 1
for obj in ans:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(color_image, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
percentage = int(obj.score * 100)
label_text = labels[obj.label_id] + " (" + str(percentage) + "%)"
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(color_image, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(color_image, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
cv2.putText(color_image, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(color_image, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE)
cv2.imshow('USB Camera', color_image)
if cv2.waitKey(1)&0xFF == ord('q'):
break
# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime
if __name__ == '__main__':
main()
```
## 4−2.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Async mode (非同期マルチプロセス処理)
こちらも、データ・セットは **`90クラス の MS-COCO`** です。
![ezgif.com-video-to-gif (7).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/47d090f5-d535-cdd6-492d-ef6ae3f0c971.gif)
```python:MobileNet-SSD-TPU-async.py
import argparse
import platform
import numpy as np
import cv2
import time
from PIL import Image
from time import sleep
import multiprocessing as mp
from edgetpu.detection.engine import DetectionEngine
lastresults = None
processes = []
frameBuffer = None
results = None
fps = ""
detectfps = ""
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
box_color = (255, 128, 0)
box_thickness = 1
label_background_color = (125, 175, 75)
label_text_color = (255, 255, 255)
percentage = 0.0
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def camThread(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno):
global fps
global detectfps
global framecount
global detectframecount
global time1
global time2
global lastresults
global cam
global window_name
cam = cv2.VideoCapture(usbcamno)
cam.set(cv2.CAP_PROP_FPS, vidfps)
cam.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
window_name = "USB Camera"
cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
while True:
t1 = time.perf_counter()
ret, color_image = cam.read()
if not ret:
continue
if frameBuffer.full():
frameBuffer.get()
frames = color_image
frameBuffer.put(color_image.copy())
res = None
if not results.empty():
res = results.get(False)
detectframecount += 1
imdraw = overlay_on_image(frames, res, label, camera_width, camera_height)
lastresults = res
else:
imdraw = overlay_on_image(frames, lastresults, label, camera_width, camera_height)
cv2.imshow('USB Camera', imdraw)
if cv2.waitKey(1)&0xFF == ord('q'):
break
# FPS calculation
framecount += 1
if framecount >= 15:
fps = "(Playback) {:.1f} FPS".format(time1/15)
detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
framecount = 0
detectframecount = 0
time1 = 0
time2 = 0
t2 = time.perf_counter()
elapsedTime = t2-t1
time1 += 1/elapsedTime
time2 += elapsedTime
def inferencer(results, frameBuffer, model, camera_width, camera_height):
engine = DetectionEngine(model)
while True:
if frameBuffer.empty():
continue
# Run inference.
color_image = frameBuffer.get()
prepimg = color_image[:, :, ::-1].copy()
prepimg = Image.fromarray(prepimg)
tinf = time.perf_counter()
ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
print(time.perf_counter() - tinf, "sec")
results.put(ans)
def overlay_on_image(frames, object_infos, label, camera_width, camera_height):
color_image = frames
if isinstance(object_infos, type(None)):
return color_image
img_cp = color_image.copy()
for obj in object_infos:
box = obj.bounding_box.flatten().tolist()
box_left = int(box[0])
box_top = int(box[1])
box_right = int(box[2])
box_bottom = int(box[3])
cv2.rectangle(img_cp, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
percentage = int(obj.score * 100)
label_text = label[obj.label_id] + " (" + str(percentage) + "%)"
label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
label_left = box_left
label_top = box_top - label_size[1]
if (label_top < 1):
label_top = 1
label_right = label_left + label_size[0]
label_bottom = label_top + label_size[1]
cv2.rectangle(img_cp, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
cv2.putText(img_cp, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
cv2.putText(img_cp, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
cv2.putText(img_cp, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
return img_cp
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
args = parser.parse_args()
model = args.model
label = ReadLabelFile(args.label)
usbcamno = args.usbcamno
camera_width = 320
camera_height = 240
vidfps = 30
try:
mp.set_start_method('forkserver')
frameBuffer = mp.Queue(10)
results = mp.Queue()
# Start streaming
p = mp.Process(target=camThread,
args=(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno),
daemon=True)
p.start()
processes.append(p)
# Activation of inferencer
p = mp.Process(target=inferencer,
args=(results, frameBuffer, model, camera_width, camera_height),
daemon=True)
p.start()
processes.append(p)
while True:
sleep(1)
finally:
for p in range(len(processes)):
processes[p].terminate()
```
## 4−3.MobileNet-SSD v2 + Core i7 + USB 3.1 + TPU + Async mode (非同期マルチプロセス処理)
同じモデルとデータセットですが、めちゃくちゃ速いです。。。
**`60 FPS - 80 FPS`** の間で揺らいでいますが、転送レートが上がるだけでココまで差がでるとは。。。
速すぎてフレームレートが人力では読めません。。。
**`12 ms から 15 ms`** で1フレームを推論できるようです。
![ezgif.com-optimize (25).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/5e5a971f-6382-8211-9f94-a170aab03e66.gif)
# 5.おわりに
実装が悪いのか、RaspberryPi3 では、画像の転送部分で USB 2.0 がボトルネックになりすぎてかなり遅いです。
NCS2 の 4リクエスト並列処理のほうが2倍のパフォーマンスが出ていました。
もう少しプログラムのブラッシュアップが必要かもしれませんが、USB 3.0 で本来の性能が出ていそうです。
なお、 USB 3.1 の同期処理では **`48 FPS`** 出ました。
USB 3.1 の非同期処理では上述のとおり **`60 FPS - 80 FPS`** 出ました。