More than 5 years have passed since last update.

Pythonistaで動画を撮影する

Posted at 2019-04-06

概要

今回は「AVFoundationを使用して、pythonで動画を撮影しフレーム単位で画像をアウトプットする。」ということを実施しました。
簡単ですが、撮影するプロセスをまとめて、実際に動作確認しました。

動画を取得する流れ

セッションの確立
AVCaptureSessionクラスで、動画や音声をインプットとして取得し、それをアウトプットとして保存、といった一連の作業を行います。
カメラの使用
AVCaptureVideoPreviewLayerクラスで、カメラで撮影した動画を画面に表示します。
動画の取得
AVCaptureMovieFileOutputクラスとAVCaptureVideoDataOutputクラスを使用してカメラから取得した動画を処理します。
最終的に撮影した画像をサーバで解析したいために、今回動画をフレーム単位で扱えるAVCaptureVideoDataOutputを選択しました。
- AVCaptureMovieFileOutput：取得した動画をmp4などで保存する
- AVCaptureVideoDataOutput：取得した動画で、フレーム単位のアクセスを可能とする
フレームの扱い
以下のように、protocolsでAVCaptureVideoDataOutputSampleBufferDelegateを指定すると、カメラから取得した動画をサンプルバッファ（今回のケースだと動画のフレーム）として、methodsで指定したメソッドに引き渡します。
methodsは自分でコーディングするので、このメソッドの中でフレーム画像を処理できるようになります。
ですので、別のprotocolsを指定すれば、音声のサンプルをmethodsで処理できるようになるのかなと思います。

sampleBufferDelegate = create_obj_class(
                 'sampleBufferDelegate',
                 methods=[captureOutput_didOutputSampleBuffer_fromConnection],
                 protocols='AVCaptureVideoDataOutputSampleBufferDelegate'

#環境

Pythonista3 v3.2
iPad iOS 12.1.4

ソース

from objc_util import *
from ctypes import c_void_p, cast
import ui
import time
import console

# 制御用の処理は 6 フレームに1回(5fps)とする
FRAME_INTERVAL = 6  # 30fps / 6 = 5fps
frame_counter = 0
last_fps_time = time.time()
fps_counter = 0

main_view = None

CIImage = ObjCClass('CIImage')
UIImage = ObjCClass('UIImage')

AVCaptureSession = ObjCClass('AVCaptureSession')
AVCaptureDevice = ObjCClass('AVCaptureDevice')
AVCaptureDeviceInput = ObjCClass('AVCaptureDeviceInput')
AVCaptureVideoDataOutput = ObjCClass('AVCaptureVideoDataOutput')
AVCaptureVideoPreviewLayer = ObjCClass('AVCaptureVideoPreviewLayer')

dispatch_get_current_queue = c.dispatch_get_current_queue
dispatch_get_current_queue.restype = c_void_p

CMSampleBufferGetImageBuffer = c.CMSampleBufferGetImageBuffer
CMSampleBufferGetImageBuffer.argtypes = [c_void_p]
CMSampleBufferGetImageBuffer.restype = c_void_p

CVPixelBufferLockBaseAddress = c.CVPixelBufferLockBaseAddress
CVPixelBufferLockBaseAddress.argtypes = [c_void_p, c_int]
CVPixelBufferLockBaseAddress.restype = None

CVPixelBufferUnlockBaseAddress = c.CVPixelBufferUnlockBaseAddress
CVPixelBufferUnlockBaseAddress.argtypes = [c_void_p, c_int]
CVPixelBufferUnlockBaseAddress.restype = None

debugflg = 1

ciContext = ObjCClass('CIContext')
ctx = ciContext.contextWithOptions_(None)

def write_output(out_ci_img, filename='.output.jpg'):
	ctx = ciContext.contextWithOptions_(None)
	cg_img = ctx.createCGImage_fromRect_(out_ci_img, out_ci_img.extent())
	ui_img = UIImage.imageWithCGImage_(cg_img)
	c.CGImageRelease.argtypes = [c_void_p]
	c.CGImageRelease.restype = None
	c.CGImageRelease(cg_img)
	c.UIImageJPEGRepresentation.argtypes = [c_void_p, CGFloat]
	c.UIImageJPEGRepresentation.restype = c_void_p
	data = ObjCInstance(c.UIImageJPEGRepresentation(ui_img.ptr, 0.75))
	data.writeToFile_atomically_(filename, True)

	return filename

def captureOutput_didOutputSampleBuffer_fromConnection_(_self, _cmd, _output, _sample_buffer, _conn):
    global frame_counter, move, turn
    global fps_counter, last_fps_time, debugflg, ui, main_view
    fps_counter += 1

    now = time.time()

    if int(now) > int(last_fps_time):
        label_fps.text = '{:5.2f} fps'.format((fps_counter) / (now - last_fps_time))
        last_fps_time = now
        fps_counter = 0

    if frame_counter == 0:    	  

        _imageBuffer = CMSampleBufferGetImageBuffer(_sample_buffer)
        CVPixelBufferLockBaseAddress(_imageBuffer, 0)
        ciimage = CIImage.imageWithCVPixelBuffer_(ObjCInstance(_imageBuffer))

        print('ciimage:', ciimage)

        if debugflg == 1:
        	print('image output!')
        	outfile = write_output(ciimage)
        	console.show_image(outfile)

        	debugflg = 0

        CVPixelBufferUnlockBaseAddress(_imageBuffer, 0)

    frame_counter = (frame_counter + 1) % FRAME_INTERVAL

def button_tapped(sender):
	global debugflg
	print('tapped')
	debugflg = 1

sampleBufferDelegate = create_objc_class(
                            'sampleBufferDelegate',
                            methods=[captureOutput_didOutputSampleBuffer_fromConnection_],
                            protocols=['AVCaptureVideoDataOutputSampleBufferDelegate'])

@on_main_thread

def main():
    global main_view, label_status, label_action, label_fps, session, delegate, output

    delegate = sampleBufferDelegate.new()

    frame_w = 375
    frame_h = 550
    main_view = ui.View(frame=(0, 0, frame_w, frame_h))
    main_view.name = 'SampleBuffer'

    session = AVCaptureSession.alloc().init()
    device = AVCaptureDevice.defaultDeviceWithMediaType_('vide')

    _input = AVCaptureDeviceInput.deviceInputWithDevice_error_(device, None)

    if _input:
        session.addInput_(_input)
        print('succesed!')
    else:
        print('Failed to create input')
        return

    output = AVCaptureVideoDataOutput.alloc().init()
    queue = ObjCInstance(dispatch_get_current_queue())
    output.setSampleBufferDelegate_queue_(delegate, queue)
    output.alwaysDiscardsLateVideoFrames = True

    session.addOutput_(output)

    session.sessionPreset = 'AVCaptureSessionPreset640x480'

    prev_layer = AVCaptureVideoPreviewLayer.layerWithSession_(session)
    prev_layer.frame = ObjCInstance(main_view).bounds()
    prev_layer.setVideoGravity_('AVLayerVideoGravityResizeAspectFill')
    ObjCInstance(main_view).layer().addSublayer_(prev_layer)

    button = ui.Button(frame=(0, 0, frame_w, 30), flex='W', name='capture')
    button.background_color = (0, 0, 0, 0.5)
    button.tint_color = 'white'
    button.title = 'capture_start'
    button.action = button_tapped
    button.alignment = ui.ALIGN_CENTER

    main_view.add_subview(button)

    # ビデオデータの実 FPS 表示
    label_fps = ui.Label(frame=(0, 30, frame_w, 30), flex='W', name='fps')
    label_fps.background_color = (0, 0, 0, 0.5)
    label_fps.text_color = 'white'
    label_fps.text = ''
    label_fps.alignment = ui.ALIGN_CENTER

    main_view.add_subview(label_fps)

    session.startRunning()
    main_view.present('sheet')
    main_view.wait_modal()

    session.stopRunning()
    delegate.release()
    session.release()
    output.release()

if __name__ == '__main__':

    main()

実行結果

アプリ起動後

アプリ停止後

consoleに画像が表示されましたが、横向きになってしまいしたね。。。

解説

captureOutput_didOutputSampleBuffer_fromConnection_で、引数の_imageBufferが動画のフレームとして取得した画像となってます。
試しにどのような画像になっている確かめるためにconsoleでコンソール上に画像を表示しますが、コンソール上で表示するためにuiimageに変換しています。

画像をフレーム単位で取得できたので、ソケット通信等でサーバに送信するといったことができそうです。

※参考
　https://gist.github.com/robo8080/dd1538665994b48972a62966b4369354
　画像クラスの違い(UIImage vs CGImage vs CIImage)

最後に

画像を取得できましたが、現在のソースだとcaptureOutput_didOutputSampleBuffer_fromConnection_の処理が途中で止まってしまう状況です。
※画像を保存しておくキューがいっぱいになると動作しなくなるのかなと思ってますが、まだ原因が分かっていないです。

もし、この辺で詳しい方がいらっしゃいましたらご教授いただけると幸いです。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up