LoginSignup
9
16

More than 1 year has passed since last update.

ノートPC+webカメラだけでモーションキャプチャぽいことをして3Dモデルを動かしてみた

Last updated at Posted at 2020-02-27

概要

こちらの記事を見かけて,処理がそこそこ軽い姿勢推定モデルを使ったらノートPCとwebカメラ(内蔵カメラ)だけでモーションキャプチャぽいことができそうなのでやってみました.
流れとして元記事とほとんど同じです.

ソースコード

必要なもの

  • Unity2018.1.9 f2
  • Python実行環境

手順

Python側

1.姿勢推定モデルの用意

以下のものをcloneしてください
https://github.com/ildoonet/tf-pose-estimation/tree/master
これを使用することで2次元画像に写る人物の姿勢を推定できます.

2. 3次元情報の復元

モーションキャプチャぽいことをするためには3次元情報が必要になってきます.
1.のモデルは2次元情報しか得られません.
そこで先ほどのリポジトリのdevelブランチにある処理を使用して3次元情報を取得します.
(元々masterにあったぽいけどなくなってた)

以下のものをcloneしてください
https://github.com/ildoonet/tf-pose-estimation/tree/devel
そして devel/src/lifting フォルダを masterに移動させます

3. WebSocketサーバの用意

今回のシステムではPython側で人物の姿勢推定などの処理を行い,Unity側では3Dモデルの表示のみを行います.
PythonとUnityの情報通信部分を今回はWebSocketを用いて実装します.

以下のコマンドを実行
pip install git+https://github.com/Pithikos/python-websocket-server

server.py
import logging

import cv2
import json
import numpy as np
import common

from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
from websocket_server import WebsocketServer
from lifting.prob_model import Prob3dPose

PORT = 5000
HOST = '127.0.0.1'

# logger_setup
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(' %(module)s -  %(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)


def create_json(pose3d):
    global old_data

    data = {'body_parts': []}

    """
    // 0 :Hip
    // 1 :RHip
    // 2 :RKnee
    // 3 :RFoot
    // 4 :LHip
    // 5 :LKnee
    // 6 :LFoot
    // 7 :Spine
    // 8 :Thorax
    // 9 :Neck/Nose
    // 10:Head
    // 11:LShoulder
    // 12:LElbow
    // 13:LWrist
    // 14:RShoulder
    // 15:RElbow
    // 16:RWrist
    """

    for i in range(17):
        data['body_parts'].append({'id': i, 'x': pose3d[0][0][i], 'y': pose3d[0][2][i], 'z': pose3d[0][1][i]})

    old_data = data
    return data


def new_client(client, server):
    logger.info('NewClient {}:{} has left.'.format(client['address'][0], client['address'][1]))


def client_left(client, server):
    logger.info('Client {}:{} has left.'.format(client['address'][0], client['address'][1]))


def message_received(client, server, message):
    _, image = cam.read()

    humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0)

    pose_2d_mpiis = []
    visibilities = []

    standard_w = 640
    standard_h = 480

    try:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(humans[0])
        pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii])
        visibilities.append(visibility)
        pose_2d_mpiis = np.array(pose_2d_mpiis)
        visibilities = np.array(visibilities)
        transformed_pose2d, weights = poseLifting.transform_joints(pose_2d_mpiis, visibilities)
        pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)
        print(pose_3d)
        server.send_message(client, json.dumps(create_json(pose_3d)))

    except :
        server.send_message(client, json.dumps(old_data))


if __name__ == '__main__':
    # main
    w, h = model_wh("432x368")
    e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368), trt_bool=False)
    poseLifting = Prob3dPose('lifting/models/prob_model_params.mat')

    cam = cv2.VideoCapture(0)

    old_data = {}

    server = WebsocketServer(port=PORT, host=HOST)
    server.set_fn_new_client(new_client)
    server.set_fn_client_left(client_left)
    server.set_fn_message_received(message_received)
    server.run_forever()

これでPython側の準備がおっけーです

Unity側

1. 3Dモデルの用意

まず初めに,動かしたい3Dモデルを用意しましょう.
今回はAssetStoreから"Unity-Chan!" Modelを使用させていただきました.

2. 必要なライブラリのインストール

SAFullBodyIKをcloneしてAssetsフォルダに移動させてください.
また,WebSocketの受信ができるように https://github.com/sta/websocket-sharp をcloneしビルドします.ビルド方法は以下のものがわかりやすいです
https://qiita.com/oishihiroaki/items/bb2977c72052f5dd5bd9

2. Unity側コード

本記事の参考元のコードを拝借させていただきました.

IKSetting.cs
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using UnityEngine;
using WebSocketSharp;
using WebSocketSharp.Net;

public class IKSetting : MonoBehaviour {
    private BodyParts bodyParts;
    private string receivedJson;
    private WebSocket ws;

    [SerializeField, Range(10, 120)]
    float FrameRate;
    public List<Transform> BoneList = new List<Transform>();
    GameObject FullbodyIK;
    Vector3[] points = new Vector3[17];
    Vector3[] NormalizeBone = new Vector3[12];
    float[] BoneDistance = new float[12];
    float Timer;
    int[,] joints = new int[,] { { 0, 1 }, { 1, 2 }, { 2, 3 }, { 0, 4 }, { 4, 5 }, { 5, 6 }, { 0, 7 }, { 7, 8 }, { 8, 9 }, { 9, 10 }, { 8, 11 }, { 11, 12 }, { 12, 13 }, { 8, 14 }, { 14, 15 }, { 15, 16 } };
    int[,] BoneJoint = new int[,] { { 0, 2 }, { 2, 3 }, { 0, 5 }, { 5, 6 }, { 0, 9 }, { 9, 10 }, { 9, 11 }, { 11, 12 }, { 12, 13 }, { 9, 14 }, { 14, 15 }, { 15, 16 } };
    int[,] NormalizeJoint = new int[,] { { 0, 1 }, { 1, 2 }, { 0, 3 }, { 3, 4 }, { 0, 5 }, { 5, 6 }, { 5, 7 }, { 7, 8 }, { 8, 9 }, { 5, 10 }, { 10, 11 }, { 11, 12 } };
    int NowFrame = 0;

    float[] x = new float[17];
    float[] y = new float[17];
    float[] z = new float[17];

    bool isReceived = false;

    // Use this for initialization
    void Start () {

        ws = new WebSocket("ws://localhost:5000/");
        ws.OnOpen += (sender, e) =>
        {
            Debug.Log("WebSocket Open");
        };
        ws.OnMessage += (sender, e) =>
        {
            receivedJson = e.Data;
            Debug.Log("Data: " + e.Data);
            isReceived = true;
        };
        ws.OnError += (sender, e) =>
        {
            Debug.Log("WebSocket Error Message: " + e.Message);
        };
        ws.OnClose += (sender, e) =>
        {
            Debug.Log("WebSocket Close");
        };
        ws.Connect();

        ws.Send("");
    }

    // Update is called once per frame
    void Update () {

        Timer += Time.deltaTime;
        ws.Send("");

        if (Timer > (1 / FrameRate))
        {
            Timer = 0;
            PointUpdate();
        }
        if (!FullbodyIK)
        {
            IKFind();
        }
        else
        {
            IKSet();
        }
    }

    void OnDestroy()
    {
        ws.Close();
        ws = null;
    }

    void PointUpdate()
    {
        if (NowFrame < 600)
        {
            NowFrame++;
            if (isReceived)
            {
                bodyParts = JsonUtility.FromJson<BodyParts>(receivedJson);
                for (int i = 0; i < 17; i++)
                {
                    x[i] = bodyParts.body_parts[i].x;
                    y[i] = bodyParts.body_parts[i].y;
                    z[i] = bodyParts.body_parts[i].z;
                }

                isReceived = false;
            }

            for (int i = 0; i < 17; i++)
            {
                points[i] = new Vector3(x[i], y[i], -z[i]);
                Debug.Log(points[i]);
            }

            for (int i = 0; i < 12; i++)
            {
                NormalizeBone[i] = (points[BoneJoint[i, 1]] - points[BoneJoint[i, 0]]).normalized;
            }
        }
    }

    void IKFind()
    {
        FullbodyIK = GameObject.Find("FullBodyIK");
        if (FullbodyIK)
        {
            for (int i = 0; i < Enum.GetNames(typeof(OpenPoseRef)).Length; i++)
            {
                Transform obj = GameObject.Find(Enum.GetName(typeof(OpenPoseRef), i)).transform;
                if (obj)
                {
                    BoneList.Add(obj);
                }
            }
            for (int i = 0; i < Enum.GetNames(typeof(NormalizeBoneRef)).Length; i++)
            {
                BoneDistance[i] = Vector3.Distance(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position);
            }
        }
    }

    void IKSet()
    {
        if (Math.Abs(points[0].x) < 1000 && Math.Abs(points[0].y) < 1000 && Math.Abs(points[0].z) < 1000)
        {
            BoneList[0].position = points[0] * 0.001f + Vector3.up * 0.8f;
        }
        for (int i = 0; i < 12; i++)
        {
            BoneList[NormalizeJoint[i, 1]].position = Vector3.Lerp(
                BoneList[NormalizeJoint[i, 1]].position,
                BoneList[NormalizeJoint[i, 0]].position + BoneDistance[i] * NormalizeBone[i]
                , 0.05f
            );
            DrawLine(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position, Color.red);
        }
        for (int i = 0; i < joints.Length / 2; i++)
        {
            DrawLine(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), points[joints[i, 1]] * 0.001f + new Vector3(-1, 0.8f, 0), Color.blue);
        }
    }

    void DrawLine(Vector3 s, Vector3 e, Color c)
    {
        Debug.DrawLine(s, e, c);
    }
}

enum OpenPoseRef
{
    Hips,
    LeftKnee, LeftFoot,
    RightKnee, RightFoot,
    Neck, Head,
    RightArm, RightElbow, RightWrist,
    LeftArm, LeftElbow, LeftWrist
};

enum NormalizeBoneRef
{
    Hip2LeftKnee, LeftKnee2LeftFoot,
    Hip2RightKnee, RightKnee2RightFoot,
    Hip2Neck, Neck2Head,
    Neck2RightArm, RightArm2RightElbow, RightElbow2RightWrist,
    Neck2LeftArm, LeftArm2LeftElbow, LeftElbow2LeftWrist
};

[System.Serializable]
public class BodyParts
{
    public Position[] body_parts;
}

[System.Serializable]
public class Position
{
    public int id;
    public float x;
    public float y;
    public float z;
}

以上で準備完了です.

実行

server.pyを実行したあとにUnity側のPlayボタンを押してください.

まとめ

多少ラグはあるものの,予想より速く動きました.
もう少し軽いモデルも探したら見つかりそうなのでまだ改良の予知はありそうです.

9
16
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
9
16