More than 3 years have passed since last update.

【Vision・Core ML・iOS・Swift】リアルタイム映像のオブジェクトを識別するPART2

Last updated at 2020-11-09Posted at 2020-11-08

この記事は何か？

リアルタイム映像のオブジェクトを識別するの続きです。ソースコードを読んでいきます。

カメラからリアルタイム映像をキャプチャするコード

実装を省略して、全体を見てみます。
サンプルコードには、下記のViewControllerクラスを継承した「別のビューコントローラ」があります。実際のところは、そちらで物体認識が行われます。

ViewController全体

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
    
    var bufferSize: CGSize = .zero
    var rootLayer: CALayer! = nil
    
    @IBOutlet weak private var previewView: UIView!
    private let session = AVCaptureSession()
    private var previewLayer: AVCaptureVideoPreviewLayer! = nil
    private let videoDataOutput = AVCaptureVideoDataOutput()
    
    private let videoDataOutputQueue = DispatchQueue(label: "VideoDataOutput", 
                                                       qos: .userInitiated, 
                                                attributes: [], 
                                      autoreleaseFrequency: .workItem)
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    }
    
    override func viewDidLoad() {
        super.viewDidLoad()
        setupAVCapture()
    }
    
    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
    }
    
    func setupAVCapture() {...}
    
    func startCaptureSession() {...}
    
    // Clean up capture setup
    func teardownAVCapture() {...}
    
    func captureOutput(_ captureOutput: AVCaptureOutput, didDrop didDropSampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        // print("frame dropped")
    }
    
    public func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {...}
}

プロパティ

UIとなるビューは、UIView型のpreviewViewだけです。これが、カメラからの映像を表示します。

メンバープロパティ

    var bufferSize: CGSize = .zero
    var rootLayer: CALayer! = nil
    
    @IBOutlet weak private var previewView: UIView!
    private let session = AVCaptureSession()
    private var previewLayer: AVCaptureVideoPreviewLayer! = nil
    private let videoDataOutput = AVCaptureVideoDataOutput()
    
    private let videoDataOutputQueue = DispatchQueue(label: "VideoDataOutput", 
                                                       qos: .userInitiated, 
                                                attributes: [], 
                                      autoreleaseFrequency: .workItem)

メソッド

captureOutput(_:didOutput:from:) メソッド

    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        // サブクラスで実装
    }

viewDidLoad()メソッドとdidReceiveMemoryWarning()メソッド

ビューコントローラのライフサイクルメソッドです。

    override func viewDidLoad() {
        super.viewDidLoad()
        setupAVCapture()
        // session.startRunnning() // ビデオを表示するだけなら、ここでセッションを開始できるはず
    }
    
    override func didReceiveMemoryWarning() {
        super.didReceiveMemoryWarning()
    }

setupAVCapture()メソッド

事実上、アプリ起動後に呼ばれる最初のメソッドです。

セッション作成、入力と出力の追加、プレビュー設定

func setupAVCapture() {
    var deviceInput: AVCaptureDeviceInput!  // 入力デバイスは、メソッド全域にわたって参照できるようにする
    
    // デバイスから広角カメラを取得して、入力デバイスでラップする
    let videoDevice = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .back).devices.first
    do {
        deviceInput = try AVCaptureDeviceInput(device: videoDevice!)
    } catch {
        print("Could not create video device input: \(error)")
        return
    }

    /* ここからセッションの設定 */
    session.beginConfiguration()
    session.sessionPreset = .vga640x480 // Visonモデルより少しだけ大きめにする
    
    // セッションに入力デバイスを追加する
    guard session.canAddInput(deviceInput) else {
        print("Could not add video device input to the session")
        session.commitConfiguration()
        return
    }
    session.addInput(deviceInput)
    
    // セッションにデータ出力を追加する
    if session.canAddOutput(videoDataOutput) {
        session.addOutput(videoDataOutput)
        videoDataOutput.alwaysDiscardsLateVideoFrames = true
        videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)]
        videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
    } else {
        print("Could not add video data output to the session")
        session.commitConfiguration()
        return
    }
    let captureConnection = videoDataOutput.connection(with: .video)
    // 常にフレームを処理する
    captureConnection?.isEnabled = true
    do {
        /* ここから、設定のためデバイスをロックする */
        try videoDevice!.lockForConfiguration()
        let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice?.activeFormat.formatDescription)!)
        bufferSize.width = CGFloat(dimensions.width)
        bufferSize.height = CGFloat(dimensions.height)
        videoDevice!.unlockForConfiguration()
        /* デバイスのロックを解除する */
    } catch {
        print(error)
    }
    session.commitConfiguration()
    /* セッションの設定はここまで */

    // セッションのプレビューレイヤー
    previewLayer = AVCaptureVideoPreviewLayer(session: session)         // セッションのプレビュー
    previewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill    // プレビューレイヤーの外寸
    rootLayer = previewView.layer           // UI上のプレビューを基底レイヤーとする
    previewLayer.frame = rootLayer.bounds   // セッションプレビューの外寸をUIに合わせる
    rootLayer.addSublayer(previewLayer)     // セッションプレビューを基底レイヤーに追加する
}

セッションの設定内容

セッションの設定はbeginConfiguration()メソッドで開始して、commitConfiguration()メソッドで完了します。

setup()メソッドより抜粋

    /* ここからセッションの設定 */
    session.beginConfiguration()
    session.sessionPreset = .vga640x480 // Visionモデルより、少しだけ大きめに
    
    // セッションに入力デバイスを追加する
    guard session.canAddInput(deviceInput) else {
        // 追加できなければ、メソッド終了
        print("Could not add video device input to the session")
        session.commitConfiguration()
        return
    }
    session.addInput(deviceInput)
    
    // セッションにデータ出力を追加する
    if session.canAddOutput(videoDataOutput) {
        session.addOutput(videoDataOutput)
        videoDataOutput.alwaysDiscardsLateVideoFrames = true
        videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange)]
        videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
    } else {
        // 追加できなければ、メソッド終了
        print("Could not add video data output to the session")
        session.commitConfiguration()
        return
    }
    let captureConnection = videoDataOutput.connection(with: .video)
    // 常にフレームを処理する
    captureConnection?.isEnabled = true
    do {
        /* ここから、設定のためデバイスをロックする */
        try videoDevice!.lockForConfiguration()
        let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice?.activeFormat.formatDescription)!)
        bufferSize.width = CGFloat(dimensions.width)
        bufferSize.height = CGFloat(dimensions.height)
        videoDevice!.unlockForConfiguration()
        /* デバイスのロックを解除する */
    } catch {
        // ロックできなければエラー
        print(error)
    }
    session.commitConfiguration()
    /* セッションの設定はここまで */

startCaptureSession()メソッド

セッションを開始します。

    func startCaptureSession() {
        session.startRunning()
    }

teardownAVCapture()メソッド

AVCaptureのプレビューレイヤーをまっさらな状態にします。

キャプチャ設定をクリーンアップする

    func teardownAVCapture() {
        previewLayer.removeFromSuperlayer()
        previewLayer = nil
    }

captureOutput(_:didDrop:from:) {メソッド

    func captureOutput(_ captureOutput: AVCaptureOutput, didDrop didDropSampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        // print("frame dropped")
    }

exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientationメソッド

デバイスの向きに基づいて、適切な画像方向を取得する

    public func exifOrientationFromDeviceOrientation() -> CGImagePropertyOrientation {
        let curDeviceOrientation = UIDevice.current.orientation
        let exifOrientation: CGImagePropertyOrientation
        
        switch curDeviceOrientation {
        case UIDeviceOrientation.portraitUpsideDown:
            exifOrientation = .left
        case UIDeviceOrientation.landscapeLeft:
            exifOrientation = .upMirrored
        case UIDeviceOrientation.landscapeRight:
            exifOrientation = .down
        case UIDeviceOrientation.portrait:
            exifOrientation = .up
        default:
            exifOrientation = .up
        }
        return exifOrientation
    }

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up