More than 5 years have passed since last update.

tf-openposeによる姿勢推定をunityに落としこもうとしたけど失敗した話

Last updated at 2018-02-27Posted at 2018-02-21

概要･目的

openposeは一枚の画像で姿勢推定を行うことができるライブラリです。つまりデプスカメラが不要です。
最近流行りのVTuberにはFaceRigかVR、モーションキャプチャー等が用いられますがopenposeでモーションを付けることができるのか試したくなったためやってみました。
openposeの実行にはGPUが必須ですが持ってないのでCPUだけでできるtf-openposeを用いて実装しました。

tf-openposeの導入

導入はtf-openposeの公式
https://github.com/ildoonet/tf-pose-estimation
や
GPU無しで画像のみから人間のボーン推定が出来るtf-openposeを導入する
で丁寧に解説されていますのでそちらからどうぞ。

tf-openposeによる姿勢推定

とりあえず添付されている画像で走らせてみます。

python ./src/run.py --image=./images/p1.jpg

このように姿勢推定ができます。一枚の画像に対しての所要時間はi7-7500Uで約0.42秒ほどでした。
このままだと何のデータも出力されないのでUnityで動かすことができません。
run.pyの中身を見ていきます。

run.py

import argparse
import logging
import time
import ast

import common
import cv2
import numpy as np
from estimator import TfPoseEstimator
from networks import get_graph_path, model_wh

from lifting.prob_model import Prob3dPose
from lifting.draw import plot_pose

logger = logging.getLogger('TfPoseEstimator')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--image', type=str, default='./images/p1.jpg')
    parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')
    parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin')
    parser.add_argument('--scales', type=str, default='[None]', help='for multiple scales, eg. [1.0, (1.1, 0.05)]')
    args = parser.parse_args()
    scales = ast.literal_eval(args.scales)

    w, h = model_wh(args.resolution)
    e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h))

    # estimate human poses from a single image !
    image = common.read_imgfile(args.image, None, None)
    # image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image: %s in %.4f seconds.' % (args.image, elapsed))

    image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False)
    # cv2.imshow('tf-pose-estimation result', image)
    # cv2.waitKey()

    import matplotlib.pyplot as plt

    fig = plt.figure()
    a = fig.add_subplot(2, 2, 1)
    a.set_title('Result')
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    bgimg = cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_BGR2RGB)
    bgimg = cv2.resize(bgimg, (e.heatMat.shape[1], e.heatMat.shape[0]), interpolation=cv2.INTER_AREA)

    # show network output
    a = fig.add_subplot(2, 2, 2)
    plt.imshow(bgimg, alpha=0.5)
    tmp = np.amax(e.heatMat[:, :, :-1], axis=2)
    plt.imshow(tmp, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    tmp2 = e.pafMat.transpose((2, 0, 1))
    tmp2_odd = np.amax(np.absolute(tmp2[::2, :, :]), axis=0)
    tmp2_even = np.amax(np.absolute(tmp2[1::2, :, :]), axis=0)

    a = fig.add_subplot(2, 2, 3)
    a.set_title('Vectormap-x')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_odd, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()

    a = fig.add_subplot(2, 2, 4)
    a.set_title('Vectormap-y')
    # plt.imshow(CocoPose.get_bgimg(inp, target_size=(vectmap.shape[1], vectmap.shape[0])), alpha=0.5)
    plt.imshow(tmp2_even, cmap=plt.cm.gray, alpha=0.5)
    plt.colorbar()
    plt.show()

    import sys
    sys.exit(0)

    logger.info('3d lifting initialization.')
    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')

    image_h, image_w = image.shape[:2]
    standard_w = 640
    standard_h = 480

    pose_2d_mpiis = []
    visibilities = []
    for human in humans:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
        pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii])
        visibilities.append(visibility)

    pose_2d_mpiis = np.array(pose_2d_mpiis)
    visibilities = np.array(visibilities)
    transformed_pose2d, weights = poseLifting.transform_joints(pose_2d_mpiis, visibilities)
    pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)

    for i, single_3d in enumerate(pose_3d):
        plot_pose(single_3d)
    plt.show()
    pass

3Dの姿勢推定は途中のsys.exit(0)で中断されているので消してみると以下のグラフが表示されます。

3Dの姿勢データはpose_3dに入っていそうなので出力してみます。

[[[  14.92142874 -108.54245999 -156.17663171  -30.61178154  206.26546395
    348.59836261  428.49344141  -23.72729188  -64.59051193  -58.24184023
    -71.0524189   155.67588092  356.72214471  328.46601474 -285.61299841
   -478.87357764 -628.7609282 ]
  [  57.32913884   32.04597062 -243.41472588 -245.64914378   82.61225533
   -220.41324492 -186.15566744  118.61493944  103.75314056   -4.63428129
     35.24575082  134.54588136  188.1003079    76.72117097  106.81791717
     60.89343418 -102.69358523]
  [-117.75328591  -94.21536506 -355.86725381 -819.16052701  -85.25362693
   -371.75525715 -725.28474816  131.9156541   406.68167084  465.84875355
    557.83256134  387.5281311   139.77284224  -70.33764487  382.73256996
    245.49967563  213.58731629]]]

[1,3,17]次元のベクトルですね。
それぞれ[人数,座標軸,各部位の座標]と対応しています。最初のデータである14.92142874は一人目の腰のx座標というように出力されています。
各部位と17次元の対応は以下のようになっています。

0　腰
1~3 右足
4~6 左足
7 背骨
8 胸
9 首
10 頭
11~13 左腕
14~16 右腕

動画からデータだけ取り出すコードを以下に記述します。

Estimate_from_movie.py

# -*- coding: utf-8 -*-
"""
Created on Tue Feb 20 11:47:50 2018

@author: KEEL
"""
import argparse
import cv2
import logging
import time
import ast
import common
import numpy as np
from estimator import TfPoseEstimator
from networks import get_graph_path, model_wh

from lifting.prob_model import Prob3dPose

logger = logging.getLogger('TfPoseEstimator')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)



out_dir = './movie/data_Doit/'

def Estimate_3Ddata(image,e,scales):
    t0 = time.time()
    # estimate human poses from a single image !
    t = time.time()
    humans = e.inference(image, scales=scales)
    elapsed = time.time() - t

    logger.info('inference image:%.4f seconds.' % (elapsed))
    logger.info('3d lifting initialization.')

    poseLifting = Prob3dPose('./src/lifting/models/prob_model_params.mat')

    standard_w = 320
    standard_h = 240

    pose_2d_mpiis = []
    visibilities = []
    for human in humans:
        pose_2d_mpii, visibility = common.MPIIPart.from_coco(human)
        pose_2d_mpiis.append([(int(x * standard_w + 0.5), int(y * standard_h + 0.5)) for x, y in pose_2d_mpii])
        visibilities.append(visibility)

    pose_2d_mpiis = np.array(pose_2d_mpiis)
    visibilities = np.array(visibilities)
    if(pose_2d_mpiis.ndim != 3):
        return 0
    transformed_pose2d, weights = poseLifting.transform_joints(pose_2d_mpiis, visibilities)
    pose_3d = poseLifting.compute_3d(transformed_pose2d, weights)

    alltime= time.time() - t0
    logger.info('estimate all time:%.4f seconds.' % (alltime))
    return pose_3d

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='tf-pose-estimation run')
    parser.add_argument('--movie', type=str, default='./movie/test.mp4')
    parser.add_argument('--dataname',type=str,default='')
    parser.add_argument('--datas', type=str, default='./movie/data/')
    args = parser.parse_args()
    movie = cv2.VideoCapture(args.movie)
    
    w, h = model_wh('432x368')
    e = TfPoseEstimator(get_graph_path('mobilenet_thin'), target_size=(w, h))
    ast_l = ast.literal_eval('[None]')
    frame_count = int(movie.get(7))

    for i in range(frame_count):
        _, frame = movie.read()
        data = Estimate_3Ddata(frame,e,ast_l)
        fw = open(args.datas + args.dataname + str(i)+'.txt','w')
        fw.write(str(data))
        fw.close()

UnityでのFullBodyIK

各部位との対応がわかったのでUnity側で整形して動かしてみます。
全身のIKは公式では対応していないので
Stereoarts SAFullBodyIK
こちらのFullBodyIKを拝借しました。

IKSetting.cs

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using System.Linq;
using System.IO;

public class IKSetting : MonoBehaviour
{
    [SerializeField, Range(10, 120)]
    float FrameRate;
    public List<Transform> BoneList = new List<Transform>();
    GameObject FullbodyIK;
    Vector3[] points = new Vector3[17];
    Vector3[] NormalizeBone = new Vector3[12];
    float[] BoneDistance = new float[12];
    float Timer;
    int[,] joints = new int[,] { { 0, 1 }, { 1, 2 }, { 2, 3 }, { 0, 4 }, { 4, 5 }, { 5, 6 }, { 0, 7 }, { 7, 8 }, { 8, 9 }, { 9, 10 }, { 8, 11 }, { 11, 12 }, { 12, 13 }, { 8, 14 }, { 14, 15 }, { 15, 16 } };
    int[,] BoneJoint = new int[,] { { 0, 2 }, { 2, 3 }, { 0, 5 }, { 5, 6 }, { 0, 9 }, { 9, 10 }, { 9, 11 }, { 11, 12 }, { 12, 13 }, { 9, 14 }, { 14, 15 }, { 15, 16 } };
    int[,] NormalizeJoint = new int[,] { { 0, 1 }, { 1, 2 }, { 0, 3 }, { 3, 4 }, { 0, 5 }, { 5, 6 }, { 5, 7 }, { 7, 8 }, { 8, 9 }, { 5, 10 }, { 10, 11 }, { 11, 12 } };
    int NowFrame = 0;
    void Start()
    {
        PointUpdate();
    }
    void Update()
    {
        Timer += Time.deltaTime;
        if (Timer > (1 / FrameRate))
        {
            Timer = 0;
            PointUpdate();
        }
        if (!FullbodyIK)
        {
            IKFind();
        }
        else
        {
            IKSet();
        }
    }
    void PointUpdate()
    {
        StreamReader fi = null;
        if (NowFrame < 600)
        {
            fi = new StreamReader(Application.dataPath + "/datas/" + "3d_data" + NowFrame.ToString() + ".txt");
            NowFrame++;
            string all = fi.ReadToEnd();
            string[] axis = all.Split(']');
            float[] x = axis[0].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            float[] y = axis[2].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            float[] z = axis[1].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            for (int i = 0; i < 17; i++)
            {
                points[i] = new Vector3(x[i], y[i], -z[i]);
            }

            for (int i = 0; i < 12; i++)
            {
                NormalizeBone[i] = (points[BoneJoint[i, 1]] - points[BoneJoint[i, 0]]).normalized;
            }
        }
    }
    void IKFind()
    {
        FullbodyIK = GameObject.Find("FullBodyIK");
        if (FullbodyIK)
        {
            for (int i = 0; i < Enum.GetNames(typeof(OpenPoseRef)).Length; i++)
            {
                Transform obj = GameObject.Find(Enum.GetName(typeof(OpenPoseRef), i)).transform;
                if (obj)
                {
                    BoneList.Add(obj);
                }
            }
            for (int i = 0; i < Enum.GetNames(typeof(NormalizeBoneRef)).Length; i++)
            {
                BoneDistance[i] = Vector3.Distance(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position);
            }
        }
    }
    void IKSet()
    {
        if (Math.Abs(points[0].x) < 1000 && Math.Abs(points[0].y) < 1000 && Math.Abs(points[0].z) < 1000)
        {
            BoneList[0].position = points[0] * 0.001f + Vector3.up * 0.8f;
        }
        for (int i = 0; i < 12; i++)
        {
            BoneList[NormalizeJoint[i, 1]].position = Vector3.Lerp(
                BoneList[NormalizeJoint[i, 1]].position,
                BoneList[NormalizeJoint[i, 0]].position + BoneDistance[i] * NormalizeBone[i]
                , 0.05f
            );
            DrawLine(BoneList[NormalizeJoint[i, 0]].position, BoneList[NormalizeJoint[i, 1]].position, Color.red);
        }
        for (int i = 0; i < joints.Length / 2; i++)
        {
            DrawLine(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), points[joints[i, 1]] * 0.001f + new Vector3(-1, 0.8f, 0), Color.blue);
        }
    }
    void DrawLine(Vector3 s, Vector3 e, Color c)
    {
        Debug.DrawLine(s, e, c);
    }
}
enum OpenPoseRef
{
    Hips,
    LeftKnee, LeftFoot,
    RightKnee, RightFoot,
    Neck, Head,
    RightArm, RightElbow, RightWrist,
    LeftArm, LeftElbow, LeftWrist
};
enum NormalizeBoneRef
{
    Hip2LeftKnee, LeftKnee2LeftFoot,
    Hip2RightKnee, RightKnee2RightFoot,
    Hip2Neck, Neck2Head,
    Neck2RightArm, RightArm2RightElbow, RightElbow2RightWrist,
    Neck2LeftArm, LeftArm2LeftElbow, LeftElbow2LeftWrist
};

先ほどの3DグラフではZ軸が上方向に表示されていましたがUnityではY軸が上なので整形を行いつつPointUpdate内で座標を入れ替えています。
また、手足の長さ等が一致しなかったので各座標の接続を正規化してボーンの長さをかけています。
【まなこ】金曜日のおはよう踊ってみた
こちらの動画から600データほど姿勢を拝借し、Unityで動かした結果が以下です。

tf-openposeで3D姿勢推定してunityに落とし込んだけどうまくいかなかった話です
青線が生データでunityちゃんはそれのLerpで動いてます
精度があまり良くなく感じる pic.twitter.com/iTOle4xhMs
— りうこつ (@jdatmtjp) February 21, 2018

回転も入れて動かしてみた結果が以下のものです。
Unityちゃんの身長に合わせてLerpしたものが赤線のデータです。赤線だけ見ると結構きれいに動いている感じがするのでFullBodyIKの問題かもしれません。

見ればわかりますがtf-openposeで取り出した生データである青線のポーズがかなりバタついています。UnityちゃんのIKにもLerpをかけていますがかなりぶるぶる震えています。これは失敗でしょう。

ボーンの回転による制御

IKで制御するとかなりバタつくのがわかったのでボーンの回転でのモーション再現も試してみました。

IKじゃなくボーンの回転で制御したバージョンです
時々肘があらぬ方向に曲がっているのと首と頭の角度が深すぎるのが気になりますがその辺は調整次第だと思います
とりあえずgitにコードだけ投げておきます pic.twitter.com/o9eItULORL
— りうこつ (@jdatmtjp) February 24, 2018

コードは以下のようになりました。

BoneController.cs

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using System.IO;
using System.Linq;
public class BoneController : MonoBehaviour
{
    [SerializeField] Animator animator;
    [SerializeField, Range(10, 120)] float FrameRate;
    [SerializeField] GameObject BoneRoot;
    public List<Transform> BoneList = new List<Transform>();
    Vector3[] points = new Vector3[17];
    Vector3[] DefaultNormalizeBone = new Vector3[12];
    Vector3[] NormalizeBone = new Vector3[12];
    Vector3[] LerpedNormalizeBone = new Vector3[12];

    Quaternion[] DefaultBoneRot = new Quaternion[17];
    Quaternion[] DefaultBoneLocalRot = new Quaternion[17];
    Vector3[] DefaultXAxis = new Vector3[17];
    Vector3[] DefaultYAxis = new Vector3[17];
    Vector3[] DefaultZAxis = new Vector3[17];


    float Timer;
    int[,] joints = new int[,] { { 0, 1 }, { 1, 2 }, { 2, 3 }, { 0, 4 }, { 4, 5 },
                                 { 5, 6 }, { 0, 7 }, { 7, 8 }, { 8, 9 }, { 9, 10},
                                 { 8, 11 }, { 11, 12 }, { 12, 13 }, { 8, 14 }, { 14, 15 },
                                 { 15, 16 } };
    int[,] BoneJoint = new int[,] { { 0, 2 }, { 2, 3 }, { 0, 5 }, { 5, 6 }, { 0, 7 },
                                    { 7, 8 }, { 8, 9 }, { 9, 10 }, { 9, 12 }, { 12, 13 },
                                    { 9, 15 }, { 15, 16 } };
    int NowFrame = 0;
    void Start()
    {
        GetBones();
        PointUpdate();
    }

    void Update()
    {
        PointUpdateByTime();
        SetBoneRot();
    }
    void GetBones()
    {
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.Hips));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftUpperLeg));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftLowerLeg));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftFoot));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightUpperLeg));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightLowerLeg));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightFoot));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.Spine));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.Chest));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.Neck));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.Head));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightUpperArm));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightLowerArm));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.RightHand));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftUpperArm));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftLowerArm));
        BoneList.Add(animator.GetBoneTransform(HumanBodyBones.LeftHand));

        for (int i = 0; i < 17; i++)
        {
            var rootT = animator.GetBoneTransform(HumanBodyBones.Hips).root;
            DefaultBoneRot[i] = BoneList[i].rotation;
            DefaultBoneLocalRot[i] = BoneList[i].localRotation;
            DefaultXAxis[i] = new Vector3(
                    Vector3.Dot(BoneList[i].right, rootT.right),
                    Vector3.Dot(BoneList[i].up, rootT.right),
                    Vector3.Dot(BoneList[i].forward, rootT.right)
                    );
            DefaultYAxis[i] = new Vector3(
                    Vector3.Dot(BoneList[i].right, rootT.up),
                    Vector3.Dot(BoneList[i].up, rootT.up),
                    Vector3.Dot(BoneList[i].forward, rootT.up)
                    );
            DefaultZAxis[i] = new Vector3(
                    Vector3.Dot(BoneList[i].right, rootT.forward),
                    Vector3.Dot(BoneList[i].up, rootT.forward),
                    Vector3.Dot(BoneList[i].forward, rootT.forward)
                    );
        }
        for (int i = 0; i < 12; i++)
        {
            DefaultNormalizeBone[i] = (BoneList[BoneJoint[i, 1]].position - BoneList[BoneJoint[i, 0]].position).normalized;
        }
    }
    void PointUpdate()
    {
        if (NowFrame < 600)
        {
            StreamReader fi = new StreamReader(Application.dataPath + "/datas/" + "3d_data" + NowFrame.ToString() + ".txt");
            NowFrame++;
            string all = fi.ReadToEnd();
            string[] axis = all.Split(']');
            float[] x = axis[0].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            float[] y = axis[2].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            float[] z = axis[1].Replace("[", "").Replace(Environment.NewLine, "").Split(' ').Where(s => s != "").Select(f => float.Parse(f)).ToArray();
            for (int i = 0; i < 17; i++)
            {
                points[i] = new Vector3(x[i], y[i], -z[i]);
            }
            for (int i = 0; i < 12; i++)
            {
                NormalizeBone[i] = (points[BoneJoint[i, 1]] - points[BoneJoint[i, 0]]).normalized;
            }
        }
    }
    void PointUpdateByTime()
    {
        Timer += Time.deltaTime;
        if (Timer > (1 / FrameRate))
        {
            Timer = 0;
            PointUpdate();
        }
    }
    Quaternion GetBoneRot(int jointNum)
    {
        Quaternion target = Quaternion.FromToRotation(DefaultNormalizeBone[jointNum], LerpedNormalizeBone[jointNum]);
        return target;
    }
    void SetBoneRot()
    {
        for (int i = 0; i < 12; i++)
        {
            LerpedNormalizeBone[i] = Vector3.Slerp(LerpedNormalizeBone[i], NormalizeBone[i], 0.1f);
        }
        if (Math.Abs(points[0].x) < 1000 && Math.Abs(points[0].y) < 1000 && Math.Abs(points[0].z) < 1000)
        {
            BoneList[0].position = Vector3.Lerp(BoneList[0].position, points[0] * 0.001f + Vector3.up * 0.8f, 0.1f);
            Vector3 hipRot = (NormalizeBone[0] + NormalizeBone[2] + NormalizeBone[4]).normalized;
            BoneRoot.transform.forward = Vector3.Lerp(BoneRoot.transform.forward, new Vector3(hipRot.x, 0, hipRot.z), 0.1f);
        }
        int j = 0;
        for (int i = 1; i < 17; i++)
        {
            if (i != 3 && i != 6 && i != 13 && i != 16)
            {
                float angle;
                Vector3 axis;
                GetBoneRot(j).ToAngleAxis(out angle, out axis);

                Vector3 axisInLocalCoordinate = axis.x * DefaultXAxis[i] + axis.y * DefaultYAxis[i] + axis.z * DefaultZAxis[i];

                Quaternion modifiedRotation = Quaternion.AngleAxis(angle, axisInLocalCoordinate);

                BoneList[i].localRotation = Quaternion.Lerp(BoneList[i].localRotation, DefaultBoneLocalRot[i] * modifiedRotation, 0.1f);
                j++;
            }
        }
        for (int i = 0; i < 16; i++)
        {
            DrawLine(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), points[joints[i, 1]] * 0.001f + new Vector3(-1, 0.8f, 0), Color.blue);
            DrawRay(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), BoneList[i].right * 0.1f, Color.magenta);
            DrawRay(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), BoneList[i].up * 0.1f, Color.green);
            DrawRay(points[joints[i, 0]] * 0.001f + new Vector3(-1, 0.8f, 0), BoneList[i].forward * 0.1f, Color.cyan);
        }
        for (int i = 0; i < 12; i++)
        {
            DrawRay(points[BoneJoint[i, 0]] * 0.001f + new Vector3(1, 0.8f, 0), NormalizeBone[i] * 0.1f, Color.green);
        }
    }
    void DrawLine(Vector3 s, Vector3 e, Color c)
    {
        Debug.DrawLine(s, e, c);
    }
    void DrawRay(Vector3 s, Vector3 d, Color c)
    {
        Debug.DrawRay(s, d, c);
    }
}
enum PointsNum
{
    Hips, RightUpperLeg, RightLowerLeg, RightFoot, LeftUpperLeg, LeftLowerLeg, LeftFoot, Spine, Chest, Neck, Head, LeftUpperArm, LeftLowerArm, LeftHand, RightUpperArm, RightLowerArm, RightHand
}

またこのコードの作成には以下のサイトから座標変換の部分を拝借しました。
Perception Neuron MocapとUnityで通常の3Dモデルキャラを動かす方法

githubに生データ含むプロジェクト丸ごとあげておきます。

まとめ

まだCPUだけでVTuberになるのは難しいようです。それと書いていて気づきましたが被写体の動きが激しすぎるかもしれません。他の動画で試したら追記します。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up