1. PINTO

    Posted

    PINTO
Changes in title
+RaspberryPi3(USB2.0)とLaptopPC(USB3.0)でGoogle Edge TPU Acceleratorを使用してMobileNet-SSDの動作スピードを検証してみました
Changes in tags
Changes in body
Source | HTML | Preview
@@ -0,0 +1,378 @@
+# 1.はじめに
+Edge TPU Accelerator を手に入れましたので、早速 NCS2 との性能差を比較するため、 MobileNet-SSD を同期処理と非同期処理で実装してみました。
+![bbb.png](https://qiita-image-store.s3.amazonaws.com/0/194769/582993ef-f86b-7be7-4afc-9bbbb174e829.png)
+# 2.導入手順
+とても簡単で一瞬で終わります。
+
+```bash:Install_the_Edge_TPU_Python_library
+$ wget http://storage.googleapis.com/cloud-iot-edge-pretrained-models/edgetpu_api.tar.gz
+$ tar xzf edgetpu_api.tar.gz
+$ cd python-tflite-source
+$ bash ./install.sh
+```
+
+# 3.テスト用プログラムで味見
+
+```bash:Run_test
+# From the python-tflite-source directory
+cd edgetpu/
+
+python3 demo/classify_image.py \
+--model test_data/mobilenet_v2_1.0_224_inat_bird_quant_edgetpu.tflite \
+--label test_data/inat_bird_labels.txt \
+--image test_data/parrot.jpg
+```
+
+![parrot.jpg](https://qiita-image-store.s3.amazonaws.com/0/194769/398b121f-6e2f-2e01-e3ed-5b3200fa9346.jpeg)
+
+ふーん、という感じ。
+
+```bash:Results
+---------------------------
+Ara macao (Scarlet Macaw)
+Score : 0.61328125
+---------------------------
+Platycercus elegans (Crimson Rosella)
+Score : 0.15234375
+```
+
+**参考URL**
+**[Built for the Edge TPU - Models - URL](https://coral.withgoogle.com/models/)**
+**[Edge TPU API overview & demos - URL](https://coral.withgoogle.com/tutorials/edgetpu-api/#api-demos)**
+
+# 4.結果
+
+## 4−1.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Sync mode (同期処理)
+![ezgif.com-video-to-gif (6).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/01581295-4267-23eb-5f81-a4f669a28406.gif)
+
+```python:MobileNet-SSD-TPU-sync.py
+import argparse
+import platform
+import numpy as np
+import cv2
+import time
+from PIL import Image
+from edgetpu.detection.engine import DetectionEngine
+
+
+# Function to read labels from text files.
+def ReadLabelFile(file_path):
+ with open(file_path, 'r') as f:
+ lines = f.readlines()
+ ret = {}
+ for line in lines:
+ pair = line.strip().split(maxsplit=1)
+ ret[int(pair[0])] = pair[1].strip()
+ return ret
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
+ parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
+ parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
+ args = parser.parse_args()
+
+ fps = ""
+ detectfps = ""
+ framecount = 0
+ detectframecount = 0
+ time1 = 0
+ time2 = 0
+ box_color = (255, 128, 0)
+ box_thickness = 1
+ label_background_color = (125, 175, 75)
+ label_text_color = (255, 255, 255)
+ percentage = 0.0
+
+ camera_width = 320
+ camera_height = 240
+
+ cap = cv2.VideoCapture(args.usbcamno)
+ cap.set(cv2.CAP_PROP_FPS, 150)
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
+
+ # Initialize engine.
+ engine = DetectionEngine(args.model)
+ labels = ReadLabelFile(args.label) if args.label else None
+
+ while True:
+ t1 = time.perf_counter()
+
+ ret, color_image = cap.read()
+ if not ret:
+ break
+
+ # Run inference.
+ prepimg = color_image[:, :, ::-1].copy()
+ prepimg = Image.fromarray(prepimg)
+
+ tinf = time.perf_counter()
+ ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
+ print(time.perf_counter() - tinf, "sec")
+
+
+ # Display result.
+ if ans:
+ detectframecount += 1
+ for obj in ans:
+ box = obj.bounding_box.flatten().tolist()
+ box_left = int(box[0])
+ box_top = int(box[1])
+ box_right = int(box[2])
+ box_bottom = int(box[3])
+ cv2.rectangle(color_image, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
+
+ percentage = int(obj.score * 100)
+ label_text = labels[obj.label_id] + " (" + str(percentage) + "%)"
+
+ label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
+ label_left = box_left
+ label_top = box_top - label_size[1]
+ if (label_top < 1):
+ label_top = 1
+ label_right = label_left + label_size[0]
+ label_bottom = label_top + label_size[1]
+ cv2.rectangle(color_image, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
+ cv2.putText(color_image, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
+
+ cv2.putText(color_image, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
+ cv2.putText(color_image, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
+
+ cv2.namedWindow('USB Camera', cv2.WINDOW_AUTOSIZE)
+ cv2.imshow('USB Camera', color_image)
+
+ if cv2.waitKey(1)&0xFF == ord('q'):
+ break
+
+ # FPS calculation
+ framecount += 1
+ if framecount >= 15:
+ fps = "(Playback) {:.1f} FPS".format(time1/15)
+ detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
+ framecount = 0
+ detectframecount = 0
+ time1 = 0
+ time2 = 0
+ t2 = time.perf_counter()
+ elapsedTime = t2-t1
+ time1 += 1/elapsedTime
+ time2 += elapsedTime
+
+if __name__ == '__main__':
+ main()
+```
+
+## 4−2.MobileNet-SSD v2 + RaspberryPi3 + USB 2.0 + TPU + Async mode (非同期マルチプロセス処理)
+![ezgif.com-video-to-gif (7).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/47d090f5-d535-cdd6-492d-ef6ae3f0c971.gif)
+
+```python:MobileNet-SSD-TPU-async.py
+import argparse
+import platform
+import numpy as np
+import cv2
+import time
+from PIL import Image
+from time import sleep
+import multiprocessing as mp
+from edgetpu.detection.engine import DetectionEngine
+
+lastresults = None
+processes = []
+frameBuffer = None
+results = None
+fps = ""
+detectfps = ""
+framecount = 0
+detectframecount = 0
+time1 = 0
+time2 = 0
+box_color = (255, 128, 0)
+box_thickness = 1
+label_background_color = (125, 175, 75)
+label_text_color = (255, 255, 255)
+percentage = 0.0
+
+# Function to read labels from text files.
+def ReadLabelFile(file_path):
+ with open(file_path, 'r') as f:
+ lines = f.readlines()
+ ret = {}
+ for line in lines:
+ pair = line.strip().split(maxsplit=1)
+ ret[int(pair[0])] = pair[1].strip()
+ return ret
+
+
+def camThread(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno):
+
+ global fps
+ global detectfps
+ global framecount
+ global detectframecount
+ global time1
+ global time2
+ global lastresults
+ global cam
+ global window_name
+
+ cam = cv2.VideoCapture(usbcamno)
+ cam.set(cv2.CAP_PROP_FPS, vidfps)
+ cam.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width)
+ cam.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height)
+ window_name = "USB Camera"
+ cv2.namedWindow(window_name, cv2.WINDOW_AUTOSIZE)
+
+ while True:
+ t1 = time.perf_counter()
+
+ ret, color_image = cam.read()
+ if not ret:
+ continue
+ if frameBuffer.full():
+ frameBuffer.get()
+ frames = color_image
+ frameBuffer.put(color_image.copy())
+ res = None
+
+ if not results.empty():
+ res = results.get(False)
+ detectframecount += 1
+ imdraw = overlay_on_image(frames, res, label, camera_width, camera_height)
+ lastresults = res
+ else:
+ imdraw = overlay_on_image(frames, lastresults, label, camera_width, camera_height)
+
+ cv2.imshow('USB Camera', imdraw)
+
+ if cv2.waitKey(1)&0xFF == ord('q'):
+ break
+
+ # FPS calculation
+ framecount += 1
+ if framecount >= 15:
+ fps = "(Playback) {:.1f} FPS".format(time1/15)
+ detectfps = "(Detection) {:.1f} FPS".format(detectframecount/time2)
+ framecount = 0
+ detectframecount = 0
+ time1 = 0
+ time2 = 0
+ t2 = time.perf_counter()
+ elapsedTime = t2-t1
+ time1 += 1/elapsedTime
+ time2 += elapsedTime
+
+
+
+def inferencer(results, frameBuffer, model, camera_width, camera_height):
+
+ engine = DetectionEngine(model)
+
+ while True:
+
+ if frameBuffer.empty():
+ continue
+
+ # Run inference.
+ color_image = frameBuffer.get()
+ prepimg = color_image[:, :, ::-1].copy()
+ prepimg = Image.fromarray(prepimg)
+
+ tinf = time.perf_counter()
+ ans = engine.DetectWithImage(prepimg, threshold=0.5, keep_aspect_ratio=True, relative_coord=False, top_k=10)
+ print(time.perf_counter() - tinf, "sec")
+ results.put(ans)
+
+
+
+def overlay_on_image(frames, object_infos, label, camera_width, camera_height):
+
+ color_image = frames
+
+ if isinstance(object_infos, type(None)):
+ return color_image
+ img_cp = color_image.copy()
+
+ for obj in object_infos:
+ box = obj.bounding_box.flatten().tolist()
+ box_left = int(box[0])
+ box_top = int(box[1])
+ box_right = int(box[2])
+ box_bottom = int(box[3])
+ cv2.rectangle(img_cp, (box_left, box_top), (box_right, box_bottom), box_color, box_thickness)
+
+ percentage = int(obj.score * 100)
+ label_text = label[obj.label_id] + " (" + str(percentage) + "%)"
+
+ label_size = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
+ label_left = box_left
+ label_top = box_top - label_size[1]
+ if (label_top < 1):
+ label_top = 1
+ label_right = label_left + label_size[0]
+ label_bottom = label_top + label_size[1]
+ cv2.rectangle(img_cp, (label_left - 1, label_top - 1), (label_right + 1, label_bottom + 1), label_background_color, -1)
+ cv2.putText(img_cp, label_text, (label_left, label_bottom), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1)
+
+ cv2.putText(img_cp, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
+ cv2.putText(img_cp, detectfps, (camera_width-170,30), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA)
+
+ return img_cp
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", default="mobilenet_ssd_v2_coco_quant_postprocess_edgetpu.tflite", help="Path of the detection model.")
+ parser.add_argument("--label", default="coco_labels.txt", help="Path of the labels file.")
+ parser.add_argument("--usbcamno", type=int, default=0, help="USB Camera number.")
+ args = parser.parse_args()
+
+ model = args.model
+ label = ReadLabelFile(args.label)
+ usbcamno = args.usbcamno
+
+ camera_width = 320
+ camera_height = 240
+ vidfps = 30
+
+ try:
+ mp.set_start_method('forkserver')
+ frameBuffer = mp.Queue(10)
+ results = mp.Queue()
+
+ # Start streaming
+ p = mp.Process(target=camThread,
+ args=(label, results, frameBuffer, camera_width, camera_height, vidfps, usbcamno),
+ daemon=True)
+ p.start()
+ processes.append(p)
+
+ # Activation of inferencer
+ p = mp.Process(target=inferencer,
+ args=(results, frameBuffer, model, camera_width, camera_height),
+ daemon=True)
+ p.start()
+ processes.append(p)
+
+ while True:
+ sleep(1)
+
+ finally:
+ for p in range(len(processes)):
+ processes[p].terminate()
+```
+
+## 4−3.MobileNet-SSD v2 + Core i7 + USB 3.1 + TPU + Async mode (非同期マルチプロセス処理)
+めちゃくちゃ速いです。。。
+**`60 FPS - 80 FPS`** の間で揺らいでいますが、転送レートが上がるだけでココまで差がでるとは。。。
+速すぎてフレームレートが人力では読めません。。。
+![ezgif.com-optimize (25).gif](https://qiita-image-store.s3.amazonaws.com/0/194769/5e5a971f-6382-8211-9f94-a170aab03e66.gif)
+
+
+# 5.おわりに
+実装が悪いのか、RaspberryPi3 では、画像の転送部分で USB 2.0 がボトルネックになりすぎてかなり遅いです。
+NCS2 の 4リクエスト並列処理のほうが2倍のパフォーマンスが出ていました。
+もう少しプログラムのブラッシュアップが必要かもしれませんが、USB 3.0 で本来の性能を発揮することが分かっています。
+なお、 USB 3.1 の同期処理では **`48 FPS`** 出ました。
+USB 3.1 の非同期処理では **`60 FPS - 80 FPS`** 出ました。