概要
こちらの記事を見かけて,処理がそこそこ軽い姿勢推定モデルを使ったらノートPCとwebカメラ(内蔵カメラ)だけでモーションキャプチャぽいことができそうなのでやってみました.
流れとして元記事とほとんど同じです.
ソースコード
必要なもの
- Unity2018.1.9 f2
- Python実行環境
手順
Python側
1.姿勢推定モデルの用意
以下のものをcloneしてください
https://github.com/ildoonet/tf-pose-estimation/tree/master
これを使用することで2次元画像に写る人物の姿勢を推定できます.
2. 3次元情報の復元
モーションキャプチャぽいことをするためには3次元情報が必要になってきます.
1.のモデルは2次元情報しか得られません.
そこで先ほどのリポジトリのdevelブランチにある処理を使用して3次元情報を取得します.
(元々masterにあったぽいけどなくなってた)
以下のものをcloneしてください
https://github.com/ildoonet/tf-pose-estimation/tree/devel
そして devel/src/lifting フォルダを masterに移動させます
3. WebSocketサーバの用意
今回のシステムではPython側で人物の姿勢推定などの処理を行い,Unity側では3Dモデルの表示のみを行います.
PythonとUnityの情報通信部分を今回はWebSocketを用いて実装します.
以下のコマンドを実行
pip install git+https://github.com/Pithikos/python-websocket-server
import logging
import cv2
import json
import numpy as np
import common
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
from websocket_server import WebsocketServer
from lifting.prob_model import Prob3dPose
PORT = 5000
HOST = '127.0.0.1'
# logger_setup
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter(' %(module)s - %(asctime)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
def create_json(pose3d):
global old_data
data = {'body_parts': []}
"""
// 0 :Hip
// 1 :RHip
// 2 :RKnee
// 3 :RFoot
// 4 :LHip
// 5 :LKnee
// 6 :LFoot
// 7 :Spine
// 8 :Thorax
// 9 :Neck/Nose
// 10:Head
// 11:LShoulder
// 12:LElbow
// 13:LWrist
// 14:RShoulder
// 15:RElbow
// 16:RWrist
"""
for i in range(17):
data['body_parts'].append({'id': i, 'x': pose3d[0][0][i], 'y': pose3d[0][2][i], 'z': pose3d[0][1][i]})
old_data = data
return data
def new_client(client, server):
logger.info('NewClient {}:{} has left.'.format(client['address'][0], client['address'][1]))
def client_left(client, server):
logger.info('Client {}:{} has left.'.format(client['address'][0], client['address'][1]))
def message_received(client, server, message):
_, image = cam.read()
humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=4.0)
pose_2d_mpiis = []
visibilities = []
standard_w = 640
standard_h = 480
try:
pose_2d_mpii, visibility = common.MPIIPart.from_coco(humans[0])
pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii])
visibilities.append(visibility)
pose_2d_mpiis = np.array(pose_2d_mpiis)
visibilities = np.array(visibilities)
transformed_pose2d, weights = poseLifting.transform_joints(pose_2d_mpiis, visibilities)
pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)
print(pose_3d)
server.send_message(client, json.dumps(create_json(pose_3d)))
except :
server.send_message(client, json.dumps(old_data))
if __name__ == '__main__':
# main
w, h = model_wh("432x368")
e = TfPoseEstimator(get_graph_path("mobilenet_thin"), target_size=(432, 368), trt_bool=False)
poseLifting = Prob3dPose('lifting/models/prob_model_params.mat')
cam = cv2.VideoCapture(0)
old_data = {}
server = WebsocketServer(port=PORT, host=HOST)
server.set_fn_new_client(new_client)
server.set_fn_client_left(client_left)
server.set_fn_message_received(message_received)
server.run_forever()
これでPython側の準備がおっけーです
Unity側
1. 3Dモデルの用意
まず初めに,動かしたい3Dモデルを用意しましょう.
今回はAssetStoreから"Unity-Chan!" Modelを使用させていただきました.
2. 必要なライブラリのインストール
SAFullBodyIKをcloneしてAssetsフォルダに移動させてください.
また,WebSocketの受信ができるように https://github.com/sta/websocket-sharp をcloneしビルドします.ビルド方法は以下のものがわかりやすいです
https://qiita.com/oishihiroaki/items/bb2977c72052f5dd5bd9
2. Unity側コード
本記事の参考元のコードを拝借させていただきました.
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using UnityEngine;
using WebSocketSharp;
using WebSocketSharp.Net;
public class IKSetting : MonoBehaviour {
private BodyParts bodyParts;
private string receivedJson;
private WebSocket ws;
[SerializeField, Range(10, 120)]
float FrameRate;
public List<Transform> BoneList = new List<Transform>();
GameObject FullbodyIK;
Vector3[] points = new Vector3[17];
Vector3[] NormalizeBone = new Vector3[12];
float[] BoneDistance = new float[12];
float Timer;
int[,] joints = new int[,] { { 0, 1 }, { 1, 2 }, { 2, 3 }, { 0, 4 }, { 4, 5 }, { 5, 6 }, { 0, 7 }, { 7, 8 }, { 8, 9 }, { 9, 10 }, { 8, 11 }, { 11, 12 }, { 12, 13 }, { 8, 14 }, { 14, 15 }, { 15, 16 } };
int[,] BoneJoint = new int[,] { { 0, 2 }, { 2, 3 }, { 0, 5 }, { 5, 6 }, { 0, 9 }, { 9, 10 }, { 9, 11 }, { 11, 12 }, { 12, 13 }, { 9, 14 }, { 14, 15 }, { 15, 16 } };
int[,] NormalizeJoint = new int[,] { { 0, 1 }, { 1, 2 }, { 0, 3 }, { 3, 4 }, { 0, 5 }, { 5, 6 }, { 5, 7 }, { 7, 8 }, { 8, 9 }, { 5, 10 }, { 10, 11 }, { 11, 12 } };
int NowFrame = 0;
float[] x = new float[17];
float[] y = new float[17];
float[] z = new float[17];
bool isReceived = false;
// Use this for initialization
void Start () {
ws = new WebSocket("ws://localhost:5000/");
ws.OnOpen += (sender, e) =>
{
Debug.Log("WebSocket Open");
};
ws.OnMessage += (sender, e) =>
{
receivedJson = e.Data;
Debug.Log("Data: " + e.Data);
isReceived = true;
};
ws.OnError += (sender, e) =>
{
Debug.Log("WebSocket Error Message: " + e.Message);
};
ws.OnClose += (sender, e) =>
{
Debug.Log("WebSocket Close");
};
ws.Connect();
ws.Send("");
}
// Update is called once per frame
void Update () {
Timer += Time.deltaTime;
ws.Send("");
if (Timer > (1 / FrameRate))
{
Timer = 0;
PointUpdate();
}
if (!FullbodyIK)
{
IKFind();
}
else
{
IKSet();
}
}
void OnDestroy()
{
ws.Close();
ws = null;
}
void PointUpdate()
{
if (NowFrame < 600)
{
NowFrame++;
if (isReceived)
{
bodyParts = JsonUtility.FromJson<BodyParts>(receivedJson);
for (int i = 0; i < 17; i++)
{
x[i] = bodyParts.body_parts[i].x;
y[i] = bodyParts.body_parts[i].y;
z[i] = bodyParts.body_parts[i].z;
}
isReceived = false;
}
for (int i = 0; i < 17; i++)
{
points[i] = new Vector3(x[i], y[i], -z[i]);
Debug.Log(points[i]);
}
for (int i = 0; i < 12; i++)
{
NormalizeBone[i] = (points[BoneJoint[i, 1]] - points[BoneJoint[i, 0]]).normalized;
}
}
}
void IKFind()
{
FullbodyIK = GameObject.Find("FullBodyIK");
if (FullbodyIK)
{
for (int i = 0; i < Enum.GetNames(typeof(OpenPoseRef)).Length; i++)
{
Transform obj = GameObject.Find(Enum.GetName(typeof(OpenPoseRef), i)).transform;
if (obj)
{
BoneList.Add(obj);
}
}
for (int i = 0; i < Enum.GetNames(typeof(NormalizeBoneRef)).Length; i++)
{
BoneDistance[i] = Vector3.Distance(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position);
}
}
}
void IKSet()
{
if (Math.Abs(points[0].x) < 1000 && Math.Abs(points[0].y) < 1000 && Math.Abs(points[0].z) < 1000)
{
BoneList[0].position = points[0] * 0.001f + Vector3.up * 0.8f;
}
for (int i = 0; i < 12; i++)
{
BoneList[NormalizeJoint[i, 1]].position = Vector3.Lerp(
BoneList[NormalizeJoint[i, 1]].position,
BoneList[NormalizeJoint[i, 0]].position + BoneDistance[i] * NormalizeBone[i]
, 0.05f
);
DrawLine(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position, Color.red);
}
for (int i = 0; i < joints.Length / 2; i++)
{
DrawLine(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), points[joints[i, 1]] * 0.001f + new Vector3(-1, 0.8f, 0), Color.blue);
}
}
void DrawLine(Vector3 s, Vector3 e, Color c)
{
Debug.DrawLine(s, e, c);
}
}
enum OpenPoseRef
{
Hips,
LeftKnee, LeftFoot,
RightKnee, RightFoot,
Neck, Head,
RightArm, RightElbow, RightWrist,
LeftArm, LeftElbow, LeftWrist
};
enum NormalizeBoneRef
{
Hip2LeftKnee, LeftKnee2LeftFoot,
Hip2RightKnee, RightKnee2RightFoot,
Hip2Neck, Neck2Head,
Neck2RightArm, RightArm2RightElbow, RightElbow2RightWrist,
Neck2LeftArm, LeftArm2LeftElbow, LeftElbow2LeftWrist
};
[System.Serializable]
public class BodyParts
{
public Position[] body_parts;
}
[System.Serializable]
public class Position
{
public int id;
public float x;
public float y;
public float z;
}
以上で準備完了です.
実行
server.pyを実行したあとにUnity側のPlayボタンを押してください.
まとめ
多少ラグはあるものの,予想より速く動きました.
もう少し軽いモデルも探したら見つかりそうなのでまだ改良の予知はありそうです.