More than 5 years have passed since last update.

OpenCV の VideoCapture() とカスケード型分類器、Google Cloud Vision API の使用例

Last updated at 2019-04-12Posted at 2019-04-08

import sys
import cv2

def test1():

    cam = cv2.VideoCapture(0)
    
    while True:
        _, img = cam.read()
        cv2.imshow('PUSH ENTER KEY', img)
        
        if cv2.waitKey(1) == 13:
            break
 
    cam.release()
    cv2.destroyAllWindows()

def test2():

    cam = cv2.VideoCapture(0)
    
    if not cam.isOpened():
        sys.exit()
    
    while True:
        _, frame = cam.read()
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (0, 0), 5)
        cv2.imshow('PUSH ENTER KEY', blur)
            
        if cv2.waitKey(1) == 13:
            break
 
    cam.release()
    cv2.destroyAllWindows()

def main():
    
    save_path = './'

    # カメラのキャプチャを開始
    cam = cv2.VideoCapture(0)
    # フレームの初期化 --- (*1)
    img1 = img2 = img3 = get_image(cam)
    th = 300
    num = 1
    while True:
        # Enterキーが押されたら終了
        if cv2.waitKey(1) == 13:
            break
        # 差分を調べる --- (*2)
        diff = check_image(img1, img2, img3)
        # 差分がthの値以上なら動きがあったと判定 --- (*3)
        cnt = cv2.countNonZero(diff)
        if cnt > th:
            print("カメラに動きを検出")
            cv2.imshow('PUSH ENTER KEY', img3)
            # 写真を画像 --- (*4)
            cv2.imwrite(save_path + str(num) + ".jpg", img3)
            num += 1
        else:
            cv2.imshow('PUSH ENTER KEY', diff)
        # 比較用の画像を保存 --- (*5)
        img1, img2, img3 = (img2, img3, get_image(cam))
    # 後始末
    cam.release()
    cv2.destroyAllWindows() 
    cam = cv2.VideoCapture(0)
    
# 画像に動きがあったか調べる関数
def check_image(img1, img2, img3):
    # グレイスケール画像に変換 --- (*6)
    gray1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY)
    gray2 = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY)
    gray3 = cv2.cvtColor(img3, cv2.COLOR_RGB2GRAY)
    # 絶対差分を調べる --- (*7)
    diff1 = cv2.absdiff(gray1, gray2)
    diff2 = cv2.absdiff(gray2, gray3)
    # 論理積を調べる --- (*8)
    diff_and = cv2.bitwise_and(diff1, diff2)
    # 白黒二値化 --- (*9)
    _, diff_wb = cv2.threshold(diff_and, 30, 255, cv2.THRESH_BINARY)
    # ノイズの除去 --- (*10)
    diff = cv2.medianBlur(diff_wb, 5)
    return diff
   
# カメラから画像を取得する
def get_image(cam):
    img = cam.read()[1]
    img = cv2.resize(img, (600, 400))
    return img

if __name__ == '__main__':
    #test1()
    #test2()
    main()

import sys
import cv2
from time import sleep
import subprocess

def detect_face1():
    
    face_cascade_path = 'haarcascade_frontalface_default.xml'
    face_cascade = cv2.CascadeClassifier(face_cascade_path)
    
    src = cv2.imread('data/temp1.jpg')
    src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
    
    faces = face_cascade.detectMultiScale(src_gray)
    
    if len(faces) == 0:
        print ('no face')
    else:
        print ('detected')        

def detect_face2():
    
    cam = cv2.VideoCapture(0)
    
    if not cam.isOpened():
        sys.exit()
        
    face_cascade_path = 'haarcascade_frontalface_default.xml'
    face_cascade = cv2.CascadeClassifier(face_cascade_path)    
    
    MIN_SIZE = (150, 150)
        
    try:
        while True:
            
            _, img = cam.read()
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            faces = face_cascade.detectMultiScale(img_gray,
                                                  minSize=MIN_SIZE)
            
            if len(faces) == 0:
                continue
            else:
                print ('detected')
                sleep(3)
                
    except KeyboardInterrupt:
        print ('ok')
         
    cam.release()
    cv2.destroyAllWindows()

def test():
    subprocess.call('dir', shell=True)

    
if __name__ == '__main__':
    #detect_face1()
    detect_face2()
    #test()

def test3():
    cap = cv2.VideoCapture(0)
    fps = 30
    
    # 録画する動画のフレームサイズ（webカメラと同じにする）
    size = (640, 480)   # (640, 480)
    
    # 出力する動画ファイルの設定
    fourcc = cv2.VideoWriter_fourcc(*'XVID')   # 'MJPG', 'XVID'
    
    video = cv2.VideoWriter('output.mp4', fourcc, fps, size)
    
    while (cap.isOpened()):
        ret, frame = cap.read()
        
        # 画面表示
        cv2.imshow('frame', frame)
     
        # 書き込み
        video.write(frame)
                
        # キー入力待機
        #if cv2.waitKey(1) & 0xFF == ord('q'):
        if cv2.waitKey(1) >= 0:
            break
    
    # 終了処理
    cap.release()
    video.release()
    cv2.destroyAllWindows()

def test4():
    cap = cv2.VideoCapture('output.avi')   #動画ファイル読込準備
    while(cap.isOpened()):
        ret, frame = cap.read()   #動画ファイルのframe読込
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)  #frameをgray画像に変換
        
        cv2.imshow('frame', frame)    #rgbを表示
        #cv2.imshow('frame', gray)    #grayを表示
        
        if cv2.waitKey(1) & 0xFF == ord('q'):   #q-keyを押すと脱出
            break
    cap.release()     #capを開放
    cv2.destroyAllWindows()   #windowを開放

# Real time face detection
def test5():
    
    face_cascade_path = 'haarcascade_frontalface_default.xml'
    face_cascade = cv2.CascadeClassifier(face_cascade_path)

    cam = cv2.VideoCapture(0)
    
    while True:
        _, img = cam.read()
        
        img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(img_gray) 
        
        if len(faces) == 0:
                continue
        else:
            for x, y, w, h in faces:
                color = (0, 0, 255)
                cv2.rectangle(img, (x, y), (x + w, y + h),
                              color=color, thickness=2)
            cv2.imshow('Push any key.', img)            
        
        if cv2.waitKey(1) >= 0:
            break
 
    cam.release()
    cv2.destroyAllWindows()

# Real time object detection with Google API    
def test6():
    import os
    from google.cloud import vision
    
    client = vision.ImageAnnotatorClient()
    
    cam = cv2.VideoCapture(0)
    
    while True:
        _, img = cam.read()
        width = img.shape[1]
        height = img.shape[0]
        color = (0, 0, 255)
        font = cv2.FONT_HERSHEY_SIMPLEX
        
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        result, enc_img = cv2.imencode(".jpg", img_rgb)
        content = enc_img.tostring()
       
        image = vision.types.Image(content=content)
        objects = client.object_localization(
                image=image).localized_object_annotations
        
        if len(objects) == 0:
                continue
        else:            
            for obj in objects:
                x1 = int(obj.bounding_poly.normalized_vertices[0].x * width)
                y1 = int(obj.bounding_poly.normalized_vertices[0].y * height)
                x2 = int(obj.bounding_poly.normalized_vertices[2].x * width)
                y2 = int(obj.bounding_poly.normalized_vertices[2].y * height)
                cv2.rectangle(img, (x1, y1), (x2, y2), 
                              color=color, thickness=1)
    
                label = '{} {:.2f}'.format(obj.name, obj.score)
                cv2.putText(img, label, (x1, y2), 
                            font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
                
                cv2.imshow('Push Enter Key.', img)            
        
        if cv2.waitKey(1) == 13:
            break
 
    cam.release()
    cv2.destroyAllWindows()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up