ThreejsでinstancedSkinnedMeshを使用することで、3Dの百人同時画面レンダリングが可能になります。

Posted at 2024-07-09

Threejsの製品では、レンダリング性能を向上させるためにオブジェクトをインスタンス化する必要があることは周知の事実です。しかし、Threejs公式が提供するinstancedMeshは静的なオブジェクトにのみ対応しています。私たちの製品 https://timmerse.com では、数十人から数百人が同じイベントに参加する必要があります。テスト中、アバターが20人を超えるとページのフレームレートが低下し、60fpsの滑らかなレンダリング効率を達成できないことがわかりました。

私たちは多くの3Dレンダリングの最適化を試みてきました。

ドローコールを削減する

モデルの簡略化を行い、レンダリングの全体的な効果に影響を与えずに、シーン内の三角形の数をできるだけ減らします。ここでは一般的な方法があります。元々のシーンがローポリであるため、簡略化後も多くのコンテンツが良好なレンダリング効果を維持しており、画面内の三角形の数を大幅に削減しました。同時に、CSMなどの高度なレンダリング効果は、高性能マシンでのみ有効にしています。以下はGPUパフォーマンス検出機能の実装例です：

export class GpuDetector {
  gpu: string;
  _level: 'high' | 'low' | 'middle' = 'high';

  constructor() {
    this.gpu = getGPUModel();
    this.detectPC();
    isMobileOrCloud ? this.detectMobile() : this.detectPC();
    console.log('GPU: ', this.gpu, ';level:', this._level);
  }
  get level(): 'high' | 'low' | 'middle' {
    return this._level;
  }
  detectMobile() {
    if (iOS) {
      this._level = window.screen.height >= 812 && window.devicePixelRatio >= 2 ? 'high' : 'low';
    }
    if (/adreno/i.test(this.gpu)) {
      this._level = this.adrenoGPU();
    } else if (/mali/i.test(this.gpu)) {
      this._level = this.maliGPU();
    } else if (/powervr/i.test(this.gpu)) {
      this._level = this.powerVRGPU();
    }
  }
  detectPC() {
    if (!this.gpu) {
      this._level = 'low'
    }
    /apple m/i.test(this.gpu)
      ? (this._level = 'high')
      : /apple/i.test(this.gpu)
      ? (this._level = 'middle')
      : /nvidia/i.test(this.gpu)
      ? (this._level = this.nvidiaGPU())
      : /amd/i.test(this.gpu)
      ? (this._level = this.amdGPU())
      : /intel/i.test(this.gpu) && (this._level = this.intelGPU());
  }
  powerVRGPU() {
    return /GT8/i.test(this.gpu) ? 'high' : 'low';
  }
  adrenoGPU() {
    var r = /^.+adreno\D+(\d+).+$/i.exec(this.gpu);
    if (r !== null) {
      var t = parseInt(r[1]);
      return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
    }
    var e = this.gpu.split(' '),
      t = parseInt(e[e.length - 1]);
    return t > 640 ? 'high' : t >= 570 ? 'middle' : 'low';
  }
  maliGPU() {
    if (/mali-g/i.test(this.gpu)) {
      var e = this.gpu.split('Mali-G'),
        t = parseInt(e[e.length - 1]);
      return t > 77 ? 'high' : 76 === t || 31 === t || 52 === t ? 'middle' : 'low';
    }
    return 'low';
  }
  nvidiaGPU() {
    return /(rtx|titan)/i.test(this.gpu) ? 'high' : /gtx/i.test(this.gpu) ? 'middle' : 'low';
  }
  amdGPU() {
    if (/(pro|radeon vii)/i.test(this.gpu)) return 'middle';
    // if (/(pro|radeon vii)/i.test(this.gpu)) return 'high';
    if (/(rx)/i.test(this.gpu)) {
      var e = this.gpu.split('RX ');
      return parseInt(e[e.length - 1]) > 560 ? 'middle' : 'low';
      // return parseInt(e[e.length - 1]) > 560 ? 'high' : 'middle';
    }
    return 'middle';
  }
  // Intel gpu
  intelGPU() {
    if (/iris/i.test(this.gpu)) {
      if (/opengl engine/i.test(this.gpu)) return 'middle';
      var e = this.gpu.split('Graphics ');
      return parseInt(e[1]) >= 650 ? 'middle' : 'low';
    }
    if (/HD/i.test(this.gpu)) {
      var t = this.gpu.split('HD ');
      return parseInt(t[1]) > 7e3 ? 'middle' : 'low';
    }
    return /apple/i.test(this.gpu) ? 'middle' : 'low';
  }
}

テクスチャサイズの縮小

スカイボックスのテクスチャ、アバターの各パーツのテクスチャ、画像や動画などの素材。さらに、深く隠れているテクスチャパスとして、ユーザーが自らアップロードしたモデルのテクスチャがあります。https://gltf.report/ を使用して、モデルファイル内のテクスチャが占める具体的なGPUサイズを分析できます。私はディレクトリ内のglbファイルをスキャンし、モデルの問題を分析して特定するツールを作成しました：

const fs = require('fs');
const path = require('path');
const { Document, NodeIO } = require('@gltf-transform/core');
const { execSync } = require('child_process');
const {
  KHRDracoMeshCompression,
  KHRMaterialsEmissiveStrength,
  KHRMaterialsSpecular,
  KHRMaterialsIOR,
  KHRMaterialsClearcoat,
  KHRMaterialsIridescence,
  KHRMeshQuantization,
  EXTMeshoptCompression,
} = require('@gltf-transform/extensions');
const draco3d = require('draco3dgltf');
const meshopt = require('meshoptimizer');

(async () => {
  let totalGPUMemory = 0;
  const resList = [];

  function inspectFile(filePath) {
    const output = execSync(`gltf-transform inspect "${filePath}"`, { encoding: 'utf-8' });
    console.log(output);
  }

  async function processFile(filePath) {
    const io = new NodeIO()
      .registerExtensions([
        KHRDracoMeshCompression,
        KHRMaterialsEmissiveStrength,
        KHRMaterialsSpecular,
        KHRMaterialsIOR,
        KHRMaterialsClearcoat,
        KHRMaterialsIridescence,
        KHRMeshQuantization,
        EXTMeshoptCompression,
      ])
      .registerDependencies({
        'draco3d.decoder': await draco3d.createDecoderModule(),
        'meshopt.decoder': await meshopt.MeshoptDecoder,
      });
    let document;
    try {
      document = await io.read(filePath);
    } catch (error) {
      console.error(`Error reading ${filePath}:`, error);
      return;
    }

    let fileGPUMemory = 0;

    document
      .getRoot()
      .listTextures()
      .forEach(texture => {
        const image = texture.getImage();
        const dimensions = texture.getSize();
        if (image && dimensions) {
          // Assuming 4 bytes per pixel (RGBA)
          const memorySize = dimensions[0] * dimensions[1] * 4;
          fileGPUMemory += memorySize;
        }
      });
    const res = `${fileGPUMemory / 1024 / 1024} MB texture GPU memory: ${filePath}`;
    resList.push(res);
  }

  async function traverseDirectory(directoryPath) {
    const files = fs.readdirSync(directoryPath);
    for (const file of files) {
      const fullPath = path.join(directoryPath, file);
      if (fs.statSync(fullPath).isDirectory()) {
        await traverseDirectory(fullPath);
      } else if (fullPath.endsWith('.glb') || fullPath.endsWith('.gltf')) {
        await processFile(fullPath);
      }
    }
  }

  // await traverseDirectory(path.resolve(__dirname, 'models'));
  await traverseDirectory(path.resolve(__dirname, '../../../avatar'))

  resList.sort((a, b) => {
    const aMemory = parseFloat(a.split(' ')[0]);
    const bMemory = parseFloat(b.split(' ')[0]);
    return bMemory - aMemory;
  });
  console.log(resList);
  fs.writeFileSync('./gpu-memory.txt', resList.join('\n'), 'utf-8');
})();

スキンメッシュのインスタンス化

上記2つの最適化を完了した後、アバターが少ない場合、PCとモバイルデバイスで大規模なシーンをスムーズに実行できるようになりました。現在のボトルネックはアバターにあります。私たちのアバターには、髪型、顔のパーツ、服など、10以上のパーツがあります。キャラクターは動きをサポートし、多くのスケルタルアニメーションを行えるため、各アバターのスキンメッシュは小さくない性能コストがかかります。20人のキャラクターがいれば、シーン内に200以上のスキンメッシュが存在し、レンダリングのパフォーマンスボトルネックを引き起こします。

インスタンス化されたスキンメッシュについては、three.jsのissuesで議論されています：https://github.com/mrdoob/three.js/pull/22667
ここから核心的なコードを抽出しました。

import * as THREE from 'three'

const _instanceLocalMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _instanceWorldMatrix = /*@__PURE__*/ new THREE.Matrix4()

const _offsetMatrix = /*@__PURE__*/ new THREE.Matrix4()
const _identityMatrix = /*@__PURE__*/ new THREE.Matrix4()

const _instanceIntersects = []

let patchedChunks = false

export class InstancedSkinnedMesh extends THREE.SkinnedMesh {
  constructor(geometry, material, count = 1) {
    super(geometry, material)

    this.instanceMatrix = new THREE.InstancedBufferAttribute(
      new Float32Array(count * 16),
      16
    )
    this.instanceColor = null
    this.instanceBones = null

    this.count = count

    this.frustumCulled = false

    this._mesh = null
    this.isInstancedMesh = true

    const bind = this.bind.bind(this)
    this.bind = function (skeleton, bindMatrix) {
      bind(skeleton, bindMatrix)

      this.skeleton.update = (instanceBones, id) => {
        const bones = this.skeleton.bones
        const boneInverses = this.skeleton.boneInverses
        const boneMatrices = instanceBones || this.skeleton.boneMatrices
        const boneTexture = this.skeleton.boneTexture
        const instanceId = id || 0

        // flatten bone matrices to array
        for (let i = 0, il = bones.length; i < il; i++) {
          // compute the offset between the current and the original transform
          const matrix = bones[i] ? bones[i].matrixWorld : _identityMatrix

          _offsetMatrix.multiplyMatrices(matrix, boneInverses[i])
          _offsetMatrix.toArray(
            boneMatrices,
            16 * (i + instanceId * bones.length)
          )
        }

        if (boneTexture !== null) {
          boneTexture.needsUpdate = true
        }
      }

      this.skeleton.computeBoneTexture = this.skeleton.computeInstancedBoneTexture = () => {
        this.skeleton.boneTexture = new THREE.DataTexture(
          this.instanceBones,
          this.skeleton.bones.length * 4,
          this.count,
          THREE.RGBAFormat,
          THREE.FloatType
        )
        this.skeleton.boneTexture.needsUpdate = true
      }
    }

    // Patch three.js skinning shader chunks for points and instanced bones
    if (!patchedChunks) {
      patchedChunks = true

      THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
        '#include <clipping_planes_pars_vertex>',
        '#include <clipping_planes_pars_vertex>\n#include <skinning_pars_vertex>'
      )
      THREE.ShaderChunk.points_vert = THREE.ShaderChunk.points_vert.replace(
        '#include <morphtarget_vertex>',
        '#include <skinbase_vertex>\n#include <morphtarget_vertex>\n#include <skinning_vertex>'
      )

      // Update PointsMaterial
      THREE.ShaderLib.points.vertexShader = THREE.ShaderChunk.points_vert

      THREE.ShaderChunk.skinning_pars_vertex = /* glsl */ `
        #ifdef USE_SKINNING

          uniform mat4 bindMatrix;
          uniform mat4 bindMatrixInverse;

          uniform highp sampler2D boneTexture;
          uniform int boneTextureSize;

          mat4 getBoneMatrix( const in float i ) {

          #ifdef USE_INSTANCING
              
              int j = 4 * int(i);
              vec4 v1 = texelFetch(boneTexture, ivec2( j, gl_InstanceID ), 0);
              vec4 v2 = texelFetch(boneTexture, ivec2( j + 1, gl_InstanceID ), 0);
              vec4 v3 = texelFetch(boneTexture, ivec2( j + 2, gl_InstanceID ), 0);
              vec4 v4 = texelFetch(boneTexture, ivec2( j + 3, gl_InstanceID ), 0);
              
          #else

            float j = i * 4.0;
            float x = mod( j, float( boneTextureSize ) );
            float y = floor( j / float( boneTextureSize ) );

            float dx = 1.0 / float( boneTextureSize );
            float dy = 1.0 / float( boneTextureSize );

            y = dy * ( y + 0.5 );

            vec4 v1 = texture2D( boneTexture, vec2( dx * ( x + 0.5 ), y ) );
            vec4 v2 = texture2D( boneTexture, vec2( dx * ( x + 1.5 ), y ) );
            vec4 v3 = texture2D( boneTexture, vec2( dx * ( x + 2.5 ), y ) );
            vec4 v4 = texture2D( boneTexture, vec2( dx * ( x + 3.5 ), y ) );

          #endif

            mat4 bone = mat4( v1, v2, v3, v4 );

            return bone;

          }

        #endif
      `
    }
  }

  copy(source) {
    super.copy(source)

    if (source.isInstancedMesh) {
      this.instanceMatrix.copy(source.instanceMatrix)

      if (source.instanceColor !== null)
        this.instanceColor = source.instanceColor.clone()

      this.count = source.count
    }

    return this
  }

  getColorAt(index, color) {
    color.fromArray(this.instanceColor.array, index * 3)
  }

  getMatrixAt(index, matrix) {
    matrix.fromArray(this.instanceMatrix.array, index * 16)
  }

  raycast(raycaster, intersects) {
    const matrixWorld = this.matrixWorld
    const raycastTimes = this.count

    if (this._mesh === null) {
      this._mesh = new THREE.SkinnedMesh(this.geometry, this.material)
      this._mesh.copy(this)
    }

    const _mesh = this._mesh

    if (_mesh.material === undefined) return

    for (let instanceId = 0; instanceId < raycastTimes; instanceId++) {
      // calculate the world matrix for each instance

      this.getMatrixAt(instanceId, _instanceLocalMatrix)

      _instanceWorldMatrix.multiplyMatrices(matrixWorld, _instanceLocalMatrix)

      // the mesh represents this single instance

      _mesh.matrixWorld = _instanceWorldMatrix

      _mesh.raycast(raycaster, _instanceIntersects)

      // process the result of raycast

      for (let i = 0, l = _instanceIntersects.length; i < l; i++) {
        const intersect = _instanceIntersects[i]
        intersect.instanceId = instanceId
        intersect.object = this
        intersects.push(intersect)
      }

      _instanceIntersects.length = 0
    }
  }

  setColorAt(index, color) {
    if (this.instanceColor === null) {
      this.instanceColor = new THREE.InstancedBufferAttribute(
        new Float32Array(this.instanceMatrix.count * 3),
        3
      )
    }

    color.toArray(this.instanceColor.array, index * 3)
  }

  setMatrixAt(index, matrix) {
    matrix.toArray(this.instanceMatrix.array, index * 16)
  }

  setBonesAt(index, skeleton) {
    skeleton = skeleton || this.skeleton

    const size = skeleton.bones.length * 16

    if (this.instanceBones === null) {
      this.instanceBones = new Float32Array(size * this.count)
    }

    skeleton.update(this.instanceBones, index)
  }

  updateMorphTargets() {}

  dispose() {
    this.dispatchEvent({ type: 'dispose' })
  }
}

ビジネスコードの最適化

上記の3つの主要な性能最適化を完了した後、残りは開発者が意図せずに描画プロセスに影響を与えるコードです。この部分はChromeのデベロッパーツールで比較的簡単に見つけることができるため、詳しくは説明しません。

最適化の効果

上記の一連の最適化を経て、私たちは https://timmerse.com において、100人以上が同じ空間に存在しながら、60フレームの動作効率を維持することを実現しました。

デモページに入って、さらに体験することができます：

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up