More than 5 years have passed since last update.

Flask: 動画配信から物体検出動画配信まで並べてみた♬

Last updated at 2018-06-06Posted at 2018-06-06

Flask利用した動画配信をいろいろ試してみた♬

Dir構造

flask_PJ
├── app.py
|── camera***.py
|── templates
    └── index.html

app.py:本体

Video-Stream / app.py

app.py

# !/usr/bin/env python
from importlib import import_module
import os
from flask import Flask, render_template, Response

# import camera driver
# from camera_opencv import Camera
# from Camera_meanshift import Camera
# from camera import Camera
from camera_objectDetection import Camera
# Raspberry Pi camera module (requires picamera package)
# from camera_pi import Camera

app = Flask(__name__)

@app.route('/')
def index():
    """Video streaming home page."""
    return render_template('index.html')

def gen(camera):
    """Video streaming generator function."""
    while True:
        frame = camera.get_frame()
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

@app.route('/video_feed')
def video_feed():
    """Video streaming route. Put this in the src attribute of an img tag."""
    return Response(gen(Camera()),
                    mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == '__main__':
    debug=True
    threaded=True
    app.run(host="0.0.0.0")

index.html :表示用

Video-Stream / templates / index.html

index.html

<html>
  <head>
    <title>Video Streaming Demonstration</title>
  </head>
  <body>
    <h1>Video Streaming Demonstration</h1>
    <img src="{{ url_for('video_feed') }}">
  </body>
</html>

camera.py :一番単純なカメラアプリ

Video-Stream / camera.py

camera.py

import time
from base_camera import BaseCamera

class Camera(BaseCamera):
    """An emulated camera implementation that streams a repeated sequence of
    files 1.jpg, 2.jpg and 3.jpg at a rate of one frame per second."""
    imgs = [open(f + '.jpg', 'rb').read() for f in ['1', '2', '3']]

    @staticmethod
    def frames():
        while True:
            time.sleep(1)
            yield Camera.imgs[int(time.time()) % 3]

camera_pi.py :Piカメラ用

Video-Stream / camera_pi.py

camera_pi.py

import io
import time
import picamera
from base_camera import BaseCamera

class Camera(BaseCamera):
    @staticmethod
    def frames():
        with picamera.PiCamera() as camera:
            # let camera warm up
            time.sleep(2)

            stream = io.BytesIO()
            for _ in camera.capture_continuous(stream, 'jpeg',use_video_port=True):
                # return current frame
                stream.seek(0)
                yield stream.read()
                # reset stream for next frame
                stream.seek(0)
                stream.truncate()

camera_opencv.py :一般的なUSBカメラなど

Video-Stream / camera_opencv.py

camera_opencv.py

import cv2
from base_camera import BaseCamera

class Camera(BaseCamera):
    video_source = 1

    @staticmethod
    def set_video_source(source):
        Camera.video_source = source

    @staticmethod
    def frames():
        camera = cv2.VideoCapture(Camera.video_source)
        if not camera.isOpened():
            raise RuntimeError('Could not start camera.')
        while True:
            # read current frame
            _, img = camera.read()
            # encode as a jpeg image and return it
            yield cv2.imencode('.jpg', img)[1].tobytes()

Camera_meanshift.py : OpenCVのMeanshiftやCamshiftで物体検出

Video-Stream / Camera_meanshift.py

Camera_meanshift.py

# -*- coding: utf-8 -*-
from base_camera import BaseCamera 
import numpy as np
import cv2
import time
from timeit import default_timer as timer

import keras
from keras.models import Model, Input
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.applications.vgg16 import VGG16, preprocess_input, decode_predictions
from keras.preprocessing import image
import sys

def cv_fourcc(c1, c2, c3, c4):
        return (ord(c1) & 255) + ((ord(c2) & 255) << 8) + \
            ((ord(c3) & 255) << 16) + ((ord(c4) & 255) << 24)

class Camera(BaseCamera):
    video_source = 0

    @staticmethod
    def set_video_source(source):
        Camera.video_source = source
        
    @staticmethod
    def frames():
        cap = cv2.VideoCapture(Camera.video_source)
        # 追跡する枠の座標とサイズ
        x = 100
        y = 100
        w = 224
        h = 224
        track_window = (x, y, w, h)

        # フレームの取得
        ret,frame = cap.read()
        cv2.waitKey(2) 
        # 追跡する枠を決定
        while True:
            ret,frame = cap.read()
            img_dst = cv2.rectangle(frame, (x,y), (x+w, y+h), 255, 2)
            cv2.imshow("SHOW MEANSHIFT IMAGE",img_dst)
            roi = frame[y:y+h, x:x+w]
            yield cv2.imencode('.jpg', img_dst)[1].tobytes()
            if cv2.waitKey(20)>0:
                txt=yomikomi(roi)　#物体識別
                break
        # 追跡する枠の内部を切り抜いてHSV変換
        hsv_roi =  cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
        ## マスク画像の生成
        img_mask = cv2.inRange(hsv_roi, np.array((0., 60.,32.)), np.array((180.,255.,255.)))
        ## 正規化するためのヒストグラムの生成 
        roi_hist = cv2.calcHist([hsv_roi], [0], img_mask, [180], [0,180])
        ## ノルム正規化
        cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
        term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )
    
        OUT_FILE_NAME = "meanshift_result.mp4"
        FRAME_RATE=8
        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()
        start_time=prev_time
        cv2.namedWindow('SHOW MEANSHIFT IMAGE')
        out = cv2.VideoWriter(OUT_FILE_NAME, \
                  cv_fourcc('M', 'P', '4', 'V'), \
                  FRAME_RATE, \
                  (w, h), \
                  True)
        ret, frame = cap.read()
        while(True):
            #ret, frame = cap.read()
            
            if not ret:
                print("Done!")
                return
                
            # フレームをHSV変換する
            hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
            # 上で計算したヒストグラムを特徴量として、画像の類似度を求める
            dst = cv2.calcBackProject([hsv],[0],roi_hist,[0,180], 1)
            # 物体検出する
            ret, track_window = cv2.meanShift(dst, track_window, term_crit)
            #ret, track_window = cv2.CamShift(dst, track_window, term_crit)
            # 物体検出で取得した座標を元のフレームで囲う
            x,y,w,h = track_window
            img_dst = cv2.rectangle(frame, (x,y), (x+w, y+h), 255, 2)
            cv2.putText(img_dst, txt, (x+3,y+10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0
            cv2.putText(img_dst, fps, (30,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,0), 1)
            cv2.imshow('SHOW MEANSHIFT IMAGE', img_dst)
            yield cv2.imencode('.jpg', img_dst)[1].tobytes()
            img_dst = cv2.resize(img_dst, (int(h), w))
            out.write(img_dst)
            
            # qを押したら終了。
            k = cv2.waitKey(1)
            if k == ord('q'):
                out.release()
                break
            ret, frame = cap.read()
            #yield cv2.imencode('.jpg', img_dst)[1].tobytes()
            
def yomikomi(img):
    batch_size = 2
    num_classes = 1000
    img_rows, img_cols=img.shape[0],img.shape[1]
    input_tensor = Input((img_rows, img_cols, 3))

        # 学習済みのVGG16をロード
        # 構造とともに学習済みの重みも読み込まれる
    model = VGG16(weights='imagenet', include_top=True, input_tensor=input_tensor)
    model.summary()
    #model.load_weights('params_model_epoch_003.hdf5')
    
    # 引数で指定した画像ファイルを読み込む
    # サイズはVGG16のデフォルトである224x224にリサイズされる
    # 読み込んだPIL形式の画像をarrayに変換
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    #preds = model.predict(preprocess_input(x))
    preds = model.predict(x)
    results = decode_predictions(preds, top=1)[0]
    return str(results[0][1])

camera_objectDetection.py : 物体検出しつつ動画出力

Video-Stream / camera_objectDetection.py

camera_objectDetection.py

# -*- coding: utf-8 -*-
""" A class for testing a SSD model on a video file or webcam """
import time
from base_camera import BaseCamera
import cv2
import keras
from keras.applications.imagenet_utils import preprocess_input
from keras.backend.tensorflow_backend import set_session
from keras.models import Model
from keras.preprocessing import image 
import pickle
import numpy as np
from random import shuffle
from scipy.misc import imread, imresize
from timeit import default_timer as timer
import sys
sys.path.append("..")
from ssd_utils import BBoxUtility
from ssd_v2 import SSD300v2 as SSD

def cv_fourcc(c1, c2, c3, c4):
        return (ord(c1) & 255) + ((ord(c2) & 255) << 8) + \
            ((ord(c3) & 255) << 16) + ((ord(c4) & 255) << 24)

class Camera(BaseCamera):
    video_source = 1

    @staticmethod
    def set_video_source(source):
        Camera.video_source = source
    
    @staticmethod
    def frames():
        video_path = 0 
        start_frame = 0 
        conf_thresh = 0.6
        input_shape = (480,300,3)
        class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
        NUM_CLASSES = len(class_names)
        num_classes=NUM_CLASSES
        class_colors = []
        for i in range(0, num_classes):
            hue = 255*i/num_classes
            col = np.zeros((1,1,3)).astype("uint8")
            col[0][0][0] = hue
            col[0][0][1] = 128 # Saturation
            col[0][0][2] = 255 # Value
            cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR)
            col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2]))
            class_colors.append(col) 
        bbox_util = BBoxUtility(num_classes)
        model = SSD(input_shape, num_classes=NUM_CLASSES)
        model.load_weights('weights_SSD300.hdf5') 

        INTERVAL= 33     # 待ち時間
        FRAME_RATE = 20  # fps
        ORG_WINDOW_NAME = "org"
        #GRAY_WINDOW_NAME = "gray"
        #OUT_FILE_NAME = "real_SSD_result.mp4"
        
        vid = cv2.VideoCapture(Camera.video_source)
        width, height = input_shape[0], input_shape[1]  #input_shape
        """
        out = cv2.VideoWriter(OUT_FILE_NAME, \
                      cv_fourcc('M', 'P', '4', 'V'), \
                      FRAME_RATE, \
                      (width, height), \
                      True)
        """
        if not vid.isOpened():
            raise IOError(("Couldn't open video file or webcam. If you're "
            "trying to open a webcam, make sure you video_path is an integer!"))
        
        vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
        vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
        vidar = vidw/vidh
        accum_time = 0
        curr_fps = 0
        fps = "FPS: ??"
        prev_time = timer()
        start_time=prev_time
        #cv2.namedWindow(ORG_WINDOW_NAME)
        
        while True:
            retval, orig_image = vid.read()
            if not retval:
                print("Done!")
                return
                
            im_size = (input_shape[1], input_shape[0])  
            resized = cv2.resize(orig_image, im_size)
            rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
           
            to_draw = cv2.resize(resized, (int(input_shape[1]*vidar), input_shape[0]))
            
            inputs = [image.img_to_array(rgb)]  #rgb
            tmp_inp = np.array(inputs)
            x = preprocess_input(tmp_inp)
            y = model.predict(x)
            
            results = bbox_util.detection_out(y)
            
            if len(results) > 0 and len(results[0]) > 0:
                det_label = results[0][:, 0]
                det_conf = results[0][:, 1]
                det_xmin = results[0][:, 2]
                det_ymin = results[0][:, 3]
                det_xmax = results[0][:, 4]
                det_ymax = results[0][:, 5]

                top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh]

                top_conf = det_conf[top_indices]
                top_label_indices = det_label[top_indices].tolist()
                top_xmin = det_xmin[top_indices]
                top_ymin = det_ymin[top_indices]
                top_xmax = det_xmax[top_indices]
                top_ymax = det_ymax[top_indices]

                for i in range(top_conf.shape[0]):
                    xmin = int(round(top_xmin[i] * to_draw.shape[1]))
                    ymin = int(round(top_ymin[i] * to_draw.shape[0]))
                    xmax = int(round(top_xmax[i] * to_draw.shape[1]))
                    ymax = int(round(top_ymax[i] * to_draw.shape[0]))

                    class_num = int(top_label_indices[i])
                    cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), 
                              class_colors[class_num], 2)   #to_draw
                    text = class_names[class_num] + " " + ('%.2f' % top_conf[i])

                    text_top = (xmin, ymin-10)
                    text_bot = (xmin + 80, ymin + 5)
                    text_pos = (xmin + 5, ymin)
                    cv2.rectangle(to_draw, text_top, text_bot, class_colors[class_num], -1)  #to_draw
                    cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1)  #to_draw
                    print(text," ")
            curr_time = timer()
            exec_time = curr_time - prev_time
            prev_time = curr_time
            accum_time = accum_time + exec_time
            curr_fps = curr_fps + 1
            if accum_time > 1:
                accum_time = accum_time - 1
                fps = "FPS: " + str(curr_fps)
                curr_fps = 0
            
            cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1)  #to_draw
            cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw
            #yield cv2.imencode('.jpg', to_draw)[1].tobytes()
            to_draw = cv2.resize(to_draw, (int(input_shape[0]*1), input_shape[1]))
            #cv2.imshow(ORG_WINDOW_NAME, to_draw)  #to_draw
            #out.write(to_draw)  #add to_draw
            
            if cv2.waitKey(INTERVAL)>= 0:   # & 0xFF == ord('q'):
                break
            #elif curr_time-start_time>=60:
            #    break
            yield cv2.imencode('.jpg', to_draw)[1].tobytes()
        vid.release()   #add
        #out.release()   #add
        cv2.destroyAllWindows() #add

まとめ

・Flaskでいろいろな動画配信をやってみた
・並べてみると、本質的な構造は同じ
・苦労せず、いろいろ変化させつつ遊べることが分かる

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up