はじめに
MediaPipeUnityPluginを使って簡易的なVTuberシステムを作ってみました
mediapipeがblendshapeの値出してくれるようになってて簡単なVTuberシステムならすぐに作れて良い pic.twitter.com/mLGK8ie78Y
— metaaa (@meta556) April 7, 2024
環境
- Unity 2022.3.6f1
- MediaPipeUnityPlugin v0.14.1
使用Asset
- FinalIk(VRIK)
- UniVRM(VRM1.0)
- VRoidSampleModel
実装
MediaPipeUnityPluginSampleのSampleにあるFaceDetectorRunner.cs
を参考に改変します
以下のようにOnFaceLandmarkDetectionOutputに任意の処理を追加することでFaceLandmarkerResult
を受け取りました
private void OnFaceLandmarkDetectionOutput(FaceLandmarkerResult result, Image image, long timestamp)
{
_faceLandmarkerResultAnnotationController.DrawLater(result);
avatarController.Track(result);
}
顔のトラッキング
MediapipeのFaceDetectorはLandmarkとblendshapeの値を出力してくれるので、この情報をもとにアバターのトラッキングを行います
顔の向きのトラッキング
顔の向きの操作はFinalIkを使用しました
ランドマークから顔の向きを計算し、結果をIkに適用しています
public void Update()
{
_ikTargets.Head.localRotation = Quaternion.Slerp(_ikTargets.Head.localRotation, _headRot,
_trackingParameter.ikTrackingLerp);
}
public void FaceTrack(FaceLandmarkerResult faceLandmarkResult)
{
if (faceLandmarkResult.faceLandmarks == null) return;
if (faceLandmarkResult.faceLandmarks.Count < _landmarkIndex) return;
_faceLandmarks = faceLandmarkResult.faceLandmarks[_landmarkIndex].landmarks;
_headRot = GetLookAtRotation();
}
private Quaternion GetLookAtRotation()
{
var faceVertical = (GetFaceLandmarkPosition(LandmarkDefinition.FaceLeft) -
GetFaceLandmarkPosition(LandmarkDefinition.FaceRight)).normalized;
var faceHorizontal = (GetFaceLandmarkPosition(LandmarkDefinition.FaceBottom) -
GetFaceLandmarkPosition(LandmarkDefinition.FaceTop)).normalized;
var lookRollRad = Mathf.Atan2(faceHorizontal.y, faceHorizontal.x) - 90f * Mathf.Deg2Rad;
var lookRoll = Quaternion.Euler(0, 0, lookRollRad * Mathf.Rad2Deg);
var lookVec = Vector3.Cross(faceVertical, faceHorizontal);
var lookQua = lookRoll * Quaternion.LookRotation(lookVec);
return lookQua;
}
private Vector3 GetFaceLandmarkPosition(int index)
{
var landmark = _faceLandmarks[index];
return new Vector3(landmark.x, landmark.y, landmark.z);
}
Landmarkから大体顔の中央を通る横と縦のベクトルを求め、外積を使うことで大まかな向きを求めます
これだけではロール回転 (正面を向いたまま肩の方向に頭を倒す回転?) が考慮されないので、縦のベクトルのxyの値からAtan2を用いて角度を計算し外積で求めた値と合成しています
表情のトラッキング
表情の操作は主にmediapipeが出力するBlendShapeの値をVRM1.0のBlendShapeに適用することで実装しました
(頑張ればLandmarkから値を求めることも出来ます)
public void Update()
{
var t = _trackingParameter.faceTrackingLerp;
var mouseOpenWeight = Mathf.Lerp(_vrm10RuntimeExpression.GetWeight(ExpressionKey.Aa), _mouseOpenWeight, t);
var leftBlinkWeight = Mathf.Lerp(_vrm10RuntimeExpression.GetWeight(ExpressionKey.BlinkLeft),
_blinkWeights.leftBlink, t);
var rightBlinkWeight = Mathf.Lerp(_vrm10RuntimeExpression.GetWeight(ExpressionKey.BlinkRight),
_blinkWeights.rightBlink, t);
var yaw = Mathf.Lerp(_vrm10RuntimeLookAt.Yaw, _irisYawPitch.yaw, t);
var pitch = Mathf.Lerp(_vrm10RuntimeLookAt.Pitch, _irisYawPitch.pitch, t);
_vrm10RuntimeExpression.SetWeight(ExpressionKey.Aa, mouseOpenWeight);
_vrm10RuntimeExpression.SetWeight(ExpressionKey.BlinkLeft, leftBlinkWeight);
_vrm10RuntimeExpression.SetWeight(ExpressionKey.BlinkRight, rightBlinkWeight);
_vrm10RuntimeLookAt.SetYawPitchManually(yaw, pitch);
}
public void TrackExpressions(FaceLandmarkerResult faceLandmarkResult)
{
if (faceLandmarkResult.faceBlendshapes == null) return;
if (faceLandmarkResult.faceBlendshapes.Count < _landmarkIndex) return;
var faceBlendShapes = faceLandmarkResult.faceBlendshapes[_landmarkIndex].categories;
_mouseOpenWeight = GetMouthOpenWeight(faceBlendShapes);
_blinkWeights = GetBlinkWeight(faceBlendShapes);
_irisYawPitch = GetIrisYawPitch(faceBlendShapes);
}
private float GetMouthOpenWeight(List<Category> faceBlendShapes)
{
var jawOpen = faceBlendShapes[BlendShapeDefinition.JawOpen].score;
var mouseCurve = _trackingParameter.mouseCurve;
return mouseCurve.Evaluate(jawOpen);
}
private (float, float) GetBlinkWeight(List<Category> faceBlendShapes)
{
var eyeBlinkLeft = faceBlendShapes[BlendShapeDefinition.EyeBlinkLeft].score;
var eyeBlinkRight = faceBlendShapes[BlendShapeDefinition.EyeBlinkRight].score;
var blinkCurve = _trackingParameter.blinkCurve;
return (blinkCurve.Evaluate(eyeBlinkLeft), blinkCurve.Evaluate(eyeBlinkRight));
}
private (float, float) GetIrisYawPitch(List<Category> faceBlendShapes)
{
var eyeLookDownLeft = faceBlendShapes[BlendShapeDefinition.EyeLookDownLeft].score;
var eyeLookDownRight = faceBlendShapes[BlendShapeDefinition.EyeLookDownRight].score;
var eyeLookInLeft = faceBlendShapes[BlendShapeDefinition.EyeLookInLeft].score;
var eyeLookInRight = faceBlendShapes[BlendShapeDefinition.EyeLookInRight].score;
var eyeLookOutLeft = faceBlendShapes[BlendShapeDefinition.EyeLookOutLeft].score;
var eyeLookOutRight = faceBlendShapes[BlendShapeDefinition.EyeLookOutRight].score;
var eyeLookUpLeft = faceBlendShapes[BlendShapeDefinition.EyeLookUpLeft].score;
var eyeLookUpRight = faceBlendShapes[BlendShapeDefinition.EyeLookUpRight].score;
var horizontalIrisCurve = _trackingParameter.horizontalIrisCurve;
var verticalIrisCurve = _trackingParameter.verticalIrisCurve;
var horizontalOuter = _vrm10ObjectLookAt.HorizontalOuter;
var horizontalInner = _vrm10ObjectLookAt.HorizontalInner;
var verticalDown = _vrm10ObjectLookAt.VerticalDown;
var verticalUp = _vrm10ObjectLookAt.VerticalUp;
var lookLeft = (eyeLookInRight + eyeLookOutLeft) * 0.5f;
var lookRight = (eyeLookInLeft + eyeLookOutRight) * 0.5f;
var lookUp = (eyeLookUpLeft + eyeLookUpRight) * 0.5f;
var lookDown = (eyeLookDownLeft + eyeLookDownRight) * 0.5f;
var yaw = 0f;
var pitch = 0f;
var horizontalCurveXRangeDegree =
(horizontalInner.CurveXRangeDegree + horizontalOuter.CurveXRangeDegree) * 0.5f;
if (lookRight > lookLeft)
{
lookRight = horizontalIrisCurve.Evaluate(lookRight);
yaw = Mathf.Lerp(0, horizontalCurveXRangeDegree, lookRight);
}
else
{
lookLeft = horizontalIrisCurve.Evaluate(lookLeft);
yaw = Mathf.Lerp(0, horizontalCurveXRangeDegree * -1f, lookLeft);
}
if (lookUp > lookDown)
{
lookUp = verticalIrisCurve.Evaluate(lookUp);
pitch = Mathf.Lerp(0, verticalUp.CurveXRangeDegree, lookUp);
}
else
{
lookDown = verticalIrisCurve.Evaluate(lookDown);
pitch = Mathf.Lerp(0, verticalDown.CurveXRangeDegree * -1f, lookDown);
}
return (yaw, pitch);
}
瞬きと口の開き具合
Mediapipeが出力したBlendShapeの値をAnimationCurveで調整して適用しています(簡単)
VRMには各発音?の口の形のBlendShapeがあるので頑張って値を求めてみるのもよいかもしれません
private float GetMouthOpenWeight(List<Category> faceBlendShapes)
{
var jawOpen = faceBlendShapes[BlendShapeDefinition.JawOpen].score;
var mouseCurve = _trackingParameter.mouseCurve;
return mouseCurve.Evaluate(jawOpen);
}
private (float, float) GetBlinkWeight(List<Category> faceBlendShapes)
{
var eyeBlinkLeft = faceBlendShapes[BlendShapeDefinition.EyeBlinkLeft].score;
var eyeBlinkRight = faceBlendShapes[BlendShapeDefinition.EyeBlinkRight].score;
var blinkCurve = _trackingParameter.blinkCurve;
return (blinkCurve.Evaluate(eyeBlinkLeft), blinkCurve.Evaluate(eyeBlinkRight));
}
瞳のトラッキング
VRM1.0には瞳を動かすためだと思われるBlendShapeが存在していますが、SampleModelだと動作しなかったのでSetYawPitchManually
を使用しました
MediapipeのBlendShapeは各瞳毎の値を出力しますが、SetYawPitchManuallyだと両目とも同じ値になるので以下の処理でいい感じの値を求めました
private (float, float) GetIrisYawPitch(List<Category> faceBlendShapes)
{
var eyeLookDownLeft = faceBlendShapes[BlendShapeDefinition.EyeLookDownLeft].score;
var eyeLookDownRight = faceBlendShapes[BlendShapeDefinition.EyeLookDownRight].score;
var eyeLookInLeft = faceBlendShapes[BlendShapeDefinition.EyeLookInLeft].score;
var eyeLookInRight = faceBlendShapes[BlendShapeDefinition.EyeLookInRight].score;
var eyeLookOutLeft = faceBlendShapes[BlendShapeDefinition.EyeLookOutLeft].score;
var eyeLookOutRight = faceBlendShapes[BlendShapeDefinition.EyeLookOutRight].score;
var eyeLookUpLeft = faceBlendShapes[BlendShapeDefinition.EyeLookUpLeft].score;
var eyeLookUpRight = faceBlendShapes[BlendShapeDefinition.EyeLookUpRight].score;
var horizontalIrisCurve = _trackingParameter.horizontalIrisCurve;
var verticalIrisCurve = _trackingParameter.verticalIrisCurve;
var horizontalOuter = _vrm10ObjectLookAt.HorizontalOuter;
var horizontalInner = _vrm10ObjectLookAt.HorizontalInner;
var verticalDown = _vrm10ObjectLookAt.VerticalDown;
var verticalUp = _vrm10ObjectLookAt.VerticalUp;
var lookLeft = (eyeLookInRight + eyeLookOutLeft) * 0.5f;
var lookRight = (eyeLookInLeft + eyeLookOutRight) * 0.5f;
var lookUp = (eyeLookUpLeft + eyeLookUpRight) * 0.5f;
var lookDown = (eyeLookDownLeft + eyeLookDownRight) * 0.5f;
var yaw = 0f;
var pitch = 0f;
var horizontalCurveXRangeDegree =
(horizontalInner.CurveXRangeDegree + horizontalOuter.CurveXRangeDegree) * 0.5f;
if (lookRight > lookLeft)
{
lookRight = horizontalIrisCurve.Evaluate(lookRight);
yaw = Mathf.Lerp(0, horizontalCurveXRangeDegree, lookRight);
}
else
{
lookLeft = horizontalIrisCurve.Evaluate(lookLeft);
yaw = Mathf.Lerp(0, horizontalCurveXRangeDegree * -1f, lookLeft);
}
if (lookUp > lookDown)
{
lookUp = verticalIrisCurve.Evaluate(lookUp);
pitch = Mathf.Lerp(0, verticalUp.CurveXRangeDegree, lookUp);
}
else
{
lookDown = verticalIrisCurve.Evaluate(lookDown);
pitch = Mathf.Lerp(0, verticalDown.CurveXRangeDegree * -1f, lookDown);
}
return (yaw, pitch);
}
参考