ncnnでRobustVideoMattingを動かしてみる

Last updated at 2023-12-10Posted at 2023-12-10

概要

画像や動画から人物の切り抜きができる「RobustVideoMatting」をncnnで動かす方法について紹介します。

PC環境

構築手順に入る前に、まずPCに以下のバージョンのUbuntuをインストールした環境で、構築しました。

M5UnitV2のクロスコンパイル環境を構築する手順

Ubuntu 22.04.3 LTS(x64)
OpenCV ver 0.4.4
ncnn ver 231027

Minicondaのインストール

まず最初に、Minicondaをインストールして、Pythonの環境を作ります。
Minicondaのインストーラは、Minicondaのウェブサイトからダウンロードして取得します。

% wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
% sh Miniconda3-latest-Linux-x86_64.sh

Miniconda上で、pythonの仮想環境を作成します。

% conda create -n _unitv2 python=3.8

condaをアクティブ化します。condaのアクティブ化する作業は、シェル立ち上げるごとに必要です。
次に、Pytorchをインストールします。

$ conda activate _unitv2
$ conda install pytorch torchvision torchaudio cpuonly -c pytorch
$ conda install opencv onnx
$ pip install pycocotools torchsummary

pnnxのインストール

PyTorch からncnnの学習モデルへ変換するツールである、pnnxをダウンロードしてきます。
カレントディレクトリ、もしくはパスの通っているところへpnnxのバイナリをコピーします。
https://github.com/pnnx/pnnx/releases

% mkdir RobustVideoMatting
% wget https://github.com/pnnx/pnnx/releases/download/20231127/pnnx-20231127-ubuntu.zip
% unzip pnnx-20231127-ubuntu.zip
% cp ./pnnx-20231127-ubuntu/pnnx ./pnnx

RobustVideoMattingの学習モデルのダウンロード・PNNXでの変換

pythonのスクリプトで、PyTorchのHubからRobustVideoMattingの学習モデルをダウンロードしてきます。
そして、pnnxでの変換を行います。

% gedit rvm_convert.py

rvm_convert.py

import os
import torch

model = torch.hub.load("PeterL1n/RobustVideoMatting", "mobilenetv3")  # or "resnet50"
model.cpu()
model.eval()

traced_script_module = torch.jit.trace(model, torch.randn(1, 3, 512, 512), strict=False)
traced_script_module.save("rvm_ts.pt")

os.system("./pnnx ts.pt inputshape=[1,3,512,512] device=cpu")

ncnnのRobustVideoMattingの学習モデルである、rvm_ts.ncnn.binとrvm_ts.ncnn.paramが生成されていることを確認します。

$ ls
convet.py    debug2.param       rvm_ts.pnnx.onnx   rvm_ts_pnnx.py
debug.bin    rvm_ts.ncnn.bin    rvm_ts.pnnx.param
debug.param  rvm_ts.ncnn.param  rvm_ts.pt
debug2.bin   rvm_ts.pnnx.bin    rvm_ts_ncnn.py

ncnn_rvm_main.cpp ファイルの作成

ncnnで、RobustVideoMattingを使って画像を変換するプログラムを作成します。

% gedit ncnn_rvm_main.cpp

// ncnn_rvm_main.cpp 
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
// ncnn
#include "net.h"
#include <iostream>
#include <vector>

// 結果を描画する
void draw_objects(const cv::Mat &bgr, const cv::Mat &fgr, const cv::Mat &pha, cv::Mat &comp) {

    if(bgr.size() != comp.size())
        cv::resize(bgr, comp, pha.size(), 0, 0, 1);

    for(int i = 0; i < pha.rows; i++) {
        for(int j = 0; j < pha.cols; j++) {
            uchar data = pha.at<uchar>(i, j);
            float alpha = (float)data / 255;
            comp.at<cv::Vec3b>(i, j)[0] = fgr.at<cv::Vec3b>(i, j)[0] * alpha + (1 - alpha) * 255;
            comp.at<cv::Vec3b>(i, j)[1] = fgr.at<cv::Vec3b>(i, j)[1] * alpha + (1 - alpha) * 120;
            comp.at<cv::Vec3b>(i, j)[2] = fgr.at<cv::Vec3b>(i, j)[2] * alpha + (1 - alpha) * 120;
        }
    }

    cv::imshow("pha", pha);   // alphaチャンネル
    cv::imshow("fgr", fgr);   // 合成前の画像
    cv::imshow("comp", comp); // 合成画像
    int key = cv::waitKey(0);
}

// 画像を変換する

int detect_rvm(ncnn::Net &net, const cv::Mat &bgr, cv::Mat &pha, cv::Mat &fgr) {
    const int target_width = 256;
    const int target_height = target_width;


// openCVからncnn形式への画像変換
    ncnn::Extractor ex = net.create_extractor();
    ncnn::Mat ncnn_in1 = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, bgr.cols, bgr.rows, target_width, target_height);
    const float mean_vals[3] = {0, 0, 0};
    const float norm_vals[3] = {1 / 255.0, 1 / 255.0, 1 / 255.0};
    ncnn_in1.substract_mean_normalize(mean_vals, norm_vals);

// ncnnでの解析を実施する
    ex.input("in0", ncnn_in1);
    ncnn::Mat pha_;
    ncnn::Mat fgr_;
    ex.extract("out0", fgr_);
    ex.extract("out1", pha_);

// ncnnからopencv形式への画像変換
    cv::Mat cv_pha = cv::Mat(pha_.h, pha_.w, CV_32FC1, (float *)pha_.data);
    cv::Mat cv_fgr = cv::Mat(fgr_.h, fgr_.w, CV_32FC3);
    ncnn::Mat fgr_pack3;
    ncnn::convert_packing(fgr_, fgr_pack3, 3);
    memcpy((uchar *)cv_fgr.data, fgr_pack3.data, 512 * 512 * 3 * sizeof(float));

    resize(cv_pha, cv_pha, cv::Size(bgr.cols, bgr.rows), cv::INTER_LINEAR);
    resize(cv_fgr, cv_fgr, cv::Size(bgr.cols, bgr.rows), cv::INTER_LINEAR);

// 画像データを出力する
    cv::Mat fgr8U;
    cv_fgr.convertTo(fgr8U, CV_8UC3, 255.0, 0);

    cv::Mat pha8U;
    cv_pha.convertTo(pha8U, CV_8UC1, 255.0, 0);

    cv::cvtColor(fgr8U, fgr8U, cv::COLOR_BGR2RGB);

    pha8U.copyTo(pha);
    fgr8U.copyTo(fgr);

    return 0;
}

int main(int argc, char **argv) {
    ncnn::Net net;
    net.load_param("rvm_ts.ncnn.param");
    net.load_model("rvm_ts.ncnn.bin");
    
    if(argc != 2) {
        std::cout << "Usage: " << argv[0] << " image_file" << std::endl;
        return -1;
    }

    std::string imagepath = argv[1];
    cv::Mat m = cv::imread(imagepath, 1);
    if(m.empty()) {
        std::cout << "read image failed" << std::endl;
        return -1;
    }

    cv::Mat fgr, pha, comp;
    //RobustVideoMattingを実施する
    detect_rvm(m, pha, fgr);
    //RobustVideoMattingの結果を描画する
    draw_objects(m, fgr, pha, comp); 
    return 0;
}

CMakeLists.txt ファイルの作成

cmakeでコンパイルするために、CMakeLists.txtファイルを作成します。

% gedit CMakeLists.txt

if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE Release)
endif()

SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall")

PROJECT(ncnn_nvm_main)
set(Target ncnn_nvm_main)

cmake_minimum_required(VERSION 3.5)

if(TARGET_COMPILER STREQUAL "arm")
    message(STATUS "TARGET_COMPILE STREQUAL arm")
    SET(CMAKE_CXX_COMPILER arm-none-linux-gnueabihf-g++)
    SET(CMAKE_C_COMPILER arm-none-linux-gnueabihf-gcc)
    SET(NCNN_INSTALL_DIR /opt/external/ncnn/install/arm/)
    SET(OpenCV_DIR /opt/external/opencv/install/arm/lib/cmake/opencv4/)
else()
    message(STATUS "TARGET_COMPILE STREQUAL X64")
    SET(NCNN_INSTALL_DIR /opt/external/ncnn/install/x64/)
    SET(OpenCV_DIR /opt/external/opencv/install/x64/lib/cmake/opencv4/)
endif()

set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/bin)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS} )
include_directories(${PROJECT_SOURCE_DIR}/include)
include_directories(${NCNN_INSTALL_DIR}/include/ncnn)
add_executable(${Target} ${Target}.cpp)
link_directories(${OpenCV_LIBRARY_DIRS})
target_link_libraries(${Target} ${OpenCV_LIBRARIES})
target_link_libraries (${Target} ${NCNN_INSTALL_DIR}/lib/libncnn.a -fopenmp)

ビルド

Ubuntu用のバイナリをcmakeでコンパイルして、生成されたバイナリを実行します。

$ cmake .
$ make
$ ./bin/ncnn_rvm_main

GitHub

作成したプログラムはGithubへ格納しました。

参考資料

この記事を作成するにあたり、以下のウェブサイトを参考にしました。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up