Flask利用した動画配信をいろいろ試してみた♬
Dir構造
flask_PJ
├── app.py
|── camera***.py
|── templates
└── index.html
app.py:本体
app.py
# !/usr/bin/env python
from importlib import import_module
import os
from flask import Flask, render_template, Response
# import camera driver
# from camera_opencv import Camera
# from Camera_meanshift import Camera
# from camera import Camera
from camera_objectDetection import Camera
# Raspberry Pi camera module (requires picamera package)
# from camera_pi import Camera
app = Flask(__name__)
@app.route('/')
def index():
"""Video streaming home page."""
return render_template('index.html')
def gen(camera):
"""Video streaming generator function."""
while True:
frame = camera.get_frame()
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
@app.route('/video_feed')
def video_feed():
"""Video streaming route. Put this in the src attribute of an img tag."""
return Response(gen(Camera()),
mimetype='multipart/x-mixed-replace; boundary=frame')
if __name__ == '__main__':
debug=True
threaded=True
app.run(host="0.0.0.0")
index.html :表示用
Video-Stream / templates / index.html
index.html
<html>
<head>
<title>Video Streaming Demonstration</title>
</head>
<body>
<h1>Video Streaming Demonstration</h1>
<img src="{{ url_for('video_feed') }}">
</body>
</html>
camera.py :一番単純なカメラアプリ
camera.py
import time
from base_camera import BaseCamera
class Camera(BaseCamera):
"""An emulated camera implementation that streams a repeated sequence of
files 1.jpg, 2.jpg and 3.jpg at a rate of one frame per second."""
imgs = [open(f + '.jpg', 'rb').read() for f in ['1', '2', '3']]
@staticmethod
def frames():
while True:
time.sleep(1)
yield Camera.imgs[int(time.time()) % 3]
camera_pi.py :Piカメラ用
camera_pi.py
import io
import time
import picamera
from base_camera import BaseCamera
class Camera(BaseCamera):
@staticmethod
def frames():
with picamera.PiCamera() as camera:
# let camera warm up
time.sleep(2)
stream = io.BytesIO()
for _ in camera.capture_continuous(stream, 'jpeg',use_video_port=True):
# return current frame
stream.seek(0)
yield stream.read()
# reset stream for next frame
stream.seek(0)
stream.truncate()
camera_opencv.py :一般的なUSBカメラなど
Video-Stream / camera_opencv.py
camera_opencv.py
import cv2
from base_camera import BaseCamera
class Camera(BaseCamera):
video_source = 1
@staticmethod
def set_video_source(source):
Camera.video_source = source
@staticmethod
def frames():
camera = cv2.VideoCapture(Camera.video_source)
if not camera.isOpened():
raise RuntimeError('Could not start camera.')
while True:
# read current frame
_, img = camera.read()
# encode as a jpeg image and return it
yield cv2.imencode('.jpg', img)[1].tobytes()
Camera_meanshift.py : OpenCVのMeanshiftやCamshiftで物体検出
Video-Stream / Camera_meanshift.py
Camera_meanshift.py
# -*- coding: utf-8 -*-
from base_camera import BaseCamera
import numpy as np
import cv2
import time
from timeit import default_timer as timer
import keras
from keras.models import Model, Input
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing import image
import sys
def cv_fourcc(c1, c2, c3, c4):
return (ord(c1) & 255) + ((ord(c2) & 255) << 8) + \
((ord(c3) & 255) << 16) + ((ord(c4) & 255) << 24)
class Camera(BaseCamera):
video_source = 0
@staticmethod
def set_video_source(source):
Camera.video_source = source
@staticmethod
def frames():
cap = cv2.VideoCapture(Camera.video_source)
# 追跡する枠の座標とサイズ
x = 100
y = 100
w = 224
h = 224
track_window = (x, y, w, h)
# フレームの取得
ret,frame = cap.read()
cv2.waitKey(2)
# 追跡する枠を決定
while True:
ret,frame = cap.read()
img_dst = cv2.rectangle(frame, (x,y), (x+w, y+h), 255, 2)
cv2.imshow("SHOW MEANSHIFT IMAGE",img_dst)
roi = frame[y:y+h, x:x+w]
yield cv2.imencode('.jpg', img_dst)[1].tobytes()
if cv2.waitKey(20)>0:
txt=yomikomi(roi) #物体識別
break
# 追跡する枠の内部を切り抜いてHSV変換
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
## マスク画像の生成
img_mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
## 正規化するためのヒストグラムの生成
roi_hist = cv2.calcHist([hsv_roi], [0], img_mask, [180], [0,180])
## ノルム正規化
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
OUT_FILE_NAME = "meanshift_result.mp4"
FRAME_RATE=8
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
start_time=prev_time
cv2.namedWindow('SHOW MEANSHIFT IMAGE')
out = cv2.VideoWriter(OUT_FILE_NAME, \
cv_fourcc('M', 'P', '4', 'V'), \
FRAME_RATE, \
(w, h), \
True)
ret, frame = cap.read()
while(True):
#ret, frame = cap.read()
if not ret:
print("Done!")
return
# フレームをHSV変換する
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# 上で計算したヒストグラムを特徴量として、画像の類似度を求める
dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180], 1)
# 物体検出する
ret, track_window = cv2.meanShift(dst, track_window, term_crit)
#ret, track_window = cv2.CamShift(dst, track_window, term_crit)
# 物体検出で取得した座標を元のフレームで囲う
x,y,w,h = track_window
img_dst = cv2.rectangle(frame, (x,y), (x+w, y+h), 255, 2)
cv2.putText(img_dst, txt, (x+3,y+10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.putText(img_dst, fps, (30,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 1)
cv2.imshow('SHOW MEANSHIFT IMAGE', img_dst)
yield cv2.imencode('.jpg', img_dst)[1].tobytes()
img_dst = cv2.resize(img_dst, (int(h), w))
out.write(img_dst)
# qを押したら終了。
k = cv2.waitKey(1)
if k == ord('q'):
out.release()
break
ret, frame = cap.read()
#yield cv2.imencode('.jpg', img_dst)[1].tobytes()
def yomikomi(img):
batch_size = 2
num_classes = 1000
img_rows, img_cols=img.shape[0],img.shape[1]
input_tensor = Input((img_rows, img_cols, 3))
# 学習済みのVGG16をロード
# 構造とともに学習済みの重みも読み込まれる
model = VGG16(weights='imagenet', include_top=True, input_tensor=input_tensor)
model.summary()
#model.load_weights('params_model_epoch_003.hdf5')
# 引数で指定した画像ファイルを読み込む
# サイズはVGG16のデフォルトである224x224にリサイズされる
# 読み込んだPIL形式の画像をarrayに変換
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
#preds = model.predict(preprocess_input(x))
preds = model.predict(x)
results = decode_predictions(preds, top=1)[0]
return str(results[0][1])
camera_objectDetection.py : 物体検出しつつ動画出力
Video-Stream / camera_objectDetection.py
camera_objectDetection.py
# -*- coding: utf-8 -*-
""" A class for testing a SSD model on a video file or webcam """
import time
from base_camera import BaseCamera
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image
import pickle
import numpy as np
from random import shuffle
from scipy.misc import imread, imresize
from timeit import default_timer as timer
import sys
sys.path.append("..")
from ssd_utils import BBoxUtility
from ssd_v2 import SSD300v2 as SSD
def cv_fourcc(c1, c2, c3, c4):
return (ord(c1) & 255) + ((ord(c2) & 255) << 8) + \
((ord(c3) & 255) << 16) + ((ord(c4) & 255) << 24)
class Camera(BaseCamera):
video_source = 1
@staticmethod
def set_video_source(source):
Camera.video_source = source
@staticmethod
def frames():
video_path = 0
start_frame = 0
conf_thresh = 0.6
input_shape = (480,300,3)
class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
NUM_CLASSES = len(class_names)
num_classes=NUM_CLASSES
class_colors = []
for i in range(0, num_classes):
hue = 255*i/num_classes
col = np.zeros((1,1,3)).astype("uint8")
col[0][0][0] = hue
col[0][0][1] = 128 # Saturation
col[0][0][2] = 255 # Value
cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
class_colors.append(col)
bbox_util = BBoxUtility(num_classes)
model = SSD(input_shape, num_classes=NUM_CLASSES)
model.load_weights('weights_SSD300.hdf5')
INTERVAL= 33 # 待ち時間
FRAME_RATE = 20 # fps
ORG_WINDOW_NAME = "org"
#GRAY_WINDOW_NAME = "gray"
#OUT_FILE_NAME = "real_SSD_result.mp4"
vid = cv2.VideoCapture(Camera.video_source)
width, height = input_shape[0], input_shape[1] #input_shape
"""
out = cv2.VideoWriter(OUT_FILE_NAME, \
cv_fourcc('M', 'P', '4', 'V'), \
FRAME_RATE, \
(width, height), \
True)
"""
if not vid.isOpened():
raise IOError(("Couldn't open video file or webcam. If you're "
"trying to open a webcam, make sure you video_path is an integer!"))
vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
vidar = vidw/vidh
accum_time = 0
curr_fps = 0
fps = "FPS: ??"
prev_time = timer()
start_time=prev_time
#cv2.namedWindow(ORG_WINDOW_NAME)
while True:
retval, orig_image = vid.read()
if not retval:
print("Done!")
return
im_size = (input_shape[1], input_shape[0])
resized = cv2.resize(orig_image, im_size)
rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
to_draw = cv2.resize(resized, (int(input_shape[1]*vidar), input_shape[0]))
inputs = [image.img_to_array(rgb)] #rgb
tmp_inp = np.array(inputs)
x = preprocess_input(tmp_inp)
y = model.predict(x)
results = bbox_util.detection_out(y)
if len(results) > 0 and len(results[0]) > 0:
det_label = results[0][:, 0]
det_conf = results[0][:, 1]
det_xmin = results[0][:, 2]
det_ymin = results[0][:, 3]
det_xmax = results[0][:, 4]
det_ymax = results[0][:, 5]
top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]
top_conf = det_conf[top_indices]
top_label_indices = det_label[top_indices].tolist()
top_xmin = det_xmin[top_indices]
top_ymin = det_ymin[top_indices]
top_xmax = det_xmax[top_indices]
top_ymax = det_ymax[top_indices]
for i in range(top_conf.shape[0]):
xmin = int(round(top_xmin[i] * to_draw.shape[1]))
ymin = int(round(top_ymin[i] * to_draw.shape[0]))
xmax = int(round(top_xmax[i] * to_draw.shape[1]))
ymax = int(round(top_ymax[i] * to_draw.shape[0]))
class_num = int(top_label_indices[i])
cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax),
class_colors[class_num], 2) #to_draw
text = class_names[class_num] + " " + ('%.2f' % top_conf[i])
text_top = (xmin, ymin-10)
text_bot = (xmin + 80, ymin + 5)
text_pos = (xmin + 5, ymin)
cv2.rectangle(to_draw, text_top, text_bot, class_colors[class_num], -1) #to_draw
cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw
print(text," ")
curr_time = timer()
exec_time = curr_time - prev_time
prev_time = curr_time
accum_time = accum_time + exec_time
curr_fps = curr_fps + 1
if accum_time > 1:
accum_time = accum_time - 1
fps = "FPS: " + str(curr_fps)
curr_fps = 0
cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1) #to_draw
cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw
#yield cv2.imencode('.jpg', to_draw)[1].tobytes()
to_draw = cv2.resize(to_draw, (int(input_shape[0]*1), input_shape[1]))
#cv2.imshow(ORG_WINDOW_NAME, to_draw) #to_draw
#out.write(to_draw) #add to_draw
if cv2.waitKey(INTERVAL)>= 0: # & 0xFF == ord('q'):
break
#elif curr_time-start_time>=60:
# break
yield cv2.imencode('.jpg', to_draw)[1].tobytes()
vid.release() #add
#out.release() #add
cv2.destroyAllWindows() #add
まとめ
・Flaskでいろいろな動画配信をやってみた
・並べてみると、本質的な構造は同じ
・苦労せず、いろいろ変化させつつ遊べることが分かる