More than 3 years have passed since last update.

ARKit+Metal でみんな超サイヤ人

Last updated at 2020-10-31Posted at 2020-10-17

People OcclusionのAppleのサンプルを調べる過程で超サイヤ人になったときのエフェクトが出来そうだったので挑戦。

完成イメージ


参考にした記事：
・ARKit3とMetalで人体のみにエフェクトをかける
・Swift+Metalで発光エフェクト
・MetalKit で GPU を使いこなす

オーラの作り方

超サイヤ人の画像を検索するとオーラ(?)のバリエーションは様々。
この記事では悟空がナメック星で超サイヤ人になった直後のオーラを参考にした（ギザギザがなくて簡単に出来そうだったので。黄色いオーラで人体に近いほど明るさが増す感じ）。

①人体部分を縁取りする
　人体部分を拡大して、もともとの人体部分の色を抜いて、縁とする。
②①にブラーをかける
　・黄色のブラーは大きめ、白のブラーは小さめにすることで、人体に近づくについれて明るくなるようにする
③②とカメラキャプチャ画像を合成する

①人体部分を縁取りする

分厚く縁取りする方法としては人体画像を拡大して、もともとの人体部分の色を消す方法を採用。コンピュートシェーダーで処理。

kernel void matteConvert(texture2d<half, access::read> inTexture [[ texture(0) ]],
                         texture2d<half, access::write> outWhiteTexture [[ texture(1) ]],
                         texture2d<half, access::write> outYellowTexture [[ texture(2) ]],
                         uint2 gid [[thread_position_in_grid]]) {
    
    uint2 textureIndex(gid);
    if (inTexture.read(textureIndex).r > 0.1) {
        // 人体部分は色なし
        outWhiteTexture.write(half4(0.0), gid);
        outYellowTexture.write(half4(0.0), gid);
        return;
    }
    
    // 拡大
    constexpr int scale = 15;
    constexpr int radius = scale / 2;
    half color = 0.0;
    
    for (int i=0; i<scale; i++) {
        for (int j=0; j<scale; j++) {
            uint2 textureIndex(gid.x + (i - radius), gid.y + (j - radius));
            half alpha = inTexture.read(textureIndex).r;
            if (alpha > 0.1) {
                color = 1.0;
                break;
            }
        }
        if (color > 0.0) {
            break;
        }
    }

    outWhiteTexture.write(half4(color, color, color, 1.0), gid);
    outYellowTexture.write(half4(color, color, 0.0, 1.0), gid);
}

人体画像の各ピクセルについて上下左右15px以内が人体だったら「人体」とすることで拡大する。
（1pxあたり225(=15x15)回ループするのでとても負荷が高い。どうせブラーをかけるので解像度を落としてから処理した方が良いのだろうが、頑張れなかった。いずれやろうと思う）
もともと人体だった部分は色なし（黒）とすることで、結果的に縁取り画像になる。
出力は白色と黄色のそれぞれに用意したテクスチャバッファに格納。

縁取り結果（Xcode12の Capture GPU Frame で確認）

②①にブラーをかける

ブラーは Metal Performance Shader(Image Filter) を利用。 Metal Performance Shader は独自のシェーダーと簡単に組み合わせることができる。

// 時間でブラーの大きさを変える
time += 1
// ブラー（白）
let whiteIntensity = Int((sin(Float(time)/3) + 2) * 30) | 0x01  // MPSImageTentのサイズには奇数を指定する必要がある。
let kernel1 = MPSImageTent(device: device, kernelWidth: whiteIntensity, kernelHeight: whiteIntensity)
kernel1.encode(commandBuffer: commandBuffer,
              inPlaceTexture: &whiteBlurTexture!, fallbackCopyAllocator: nil)
// ブラー（黄）
let yellowIntensity = Int((sin(Float(time)/3) + 2) * 100) | 0x01
let kernel2 = MPSImageTent(device: device, kernelWidth: yellowIntensity, kernelHeight: yellowIntensity)
kernel2.encode(commandBuffer: commandBuffer,
              inPlaceTexture: &yellowBlurTexture!, fallbackCopyAllocator: nil)

Metal Performance Shader のフィルターは色々はあるが、いくつか試したところ MPSImageTent が一番しっくりきたのでこれを採用。
白のブラーは小さめ、黄色のブラーは大きめにして、あとは、時間でブラーの大きさを変えている。

③②とカメラキャプチャ画像を合成する

ここは単純にカメラキャプチャ画像と縁のブラー後画像（白、黄色）を加算。

fragment half4 compositeImageFragmentShader(CompositeColorInOut in [[ stage_in ]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(0) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(1) ]],
                                            texture2d<float, access::sample> whiteColorTexture [[ texture(2) ]],
                                            texture2d<float, access::sample> yellowColorTexture [[ texture(3) ]],
                                            texture2d<float, access::sample> alphaTexture [[ texture(4) ]])
{
    constexpr sampler s(address::clamp_to_edge, filter::linear);

    float2 cameraTexCoord = in.texCoordCamera;

    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    float4 rgb = ycbcrToRGBTransform(capturedImageTextureY.sample(s, cameraTexCoord), capturedImageTextureCbCr.sample(s, cameraTexCoord));

    half4 cameraColor = half4(rgb);
    half4 whiteColor = half4(whiteColorTexture.sample(s, cameraTexCoord));
    half4 yellowColor = half4(yellowColorTexture.sample(s, cameraTexCoord)) * 2.0;
        
    return cameraColor + whiteColor + yellowColor;
}

加算結果はclampしなくても動作している。

ソースコード全体

・swift

ViewController.swift

class ViewController: UIViewController, MTKViewDelegate {
    
    var session = ARSession()
    var renderer: Renderer!
    
    override func viewDidLoad() {
        super.viewDidLoad()
        
        if let view = self.view as? MTKView {
            view.device = MTLCreateSystemDefaultDevice()
            view.backgroundColor = UIColor.clear
            view.delegate = self
            renderer = Renderer(session: session, metalDevice: view.device!, mtkView: view)
            renderer.drawRectResized(size: view.bounds.size)
        }
    }
    
    override func viewWillAppear(_ animated: Bool) {
        super.viewWillAppear(animated)
        let configuration = ARWorldTrackingConfiguration()
        configuration.frameSemantics = .personSegmentation
        session.run(configuration)
    }
    
    func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
        renderer.drawRectResized(size: size)
    }
    
    func draw(in view: MTKView) {
        renderer.update()
    }
}

Renderer.swift

let kMaxBuffersInFlight: Int = 3

let kImagePlaneVertexData: [Float] = [
    -1.0, -1.0, 0.0, 1.0,
    1.0, -1.0, 1.0, 1.0,
    -1.0, 1.0, 0.0, 0.0,
    1.0, 1.0, 1.0, 0.0
]

class Renderer {
    let session: ARSession
    let matteGenerator: ARMatteGenerator
    let device: MTLDevice
    let inFlightSemaphore = DispatchSemaphore(value: kMaxBuffersInFlight)
    var mtkView: MTKView
    
    var commandQueue: MTLCommandQueue!
    var imagePlaneVertexBuffer: MTLBuffer!
    // 最終画像合成用PipelineState
    var compositePipelineState: MTLRenderPipelineState!
    // 人体画像の拡大加工用PipelineState
    var computeState: MTLComputePipelineState!
    // キャプチャ画像テクスチャ
    var capturedImageTextureY: CVMetalTexture?
    var capturedImageTextureCbCr: CVMetalTexture?
    var capturedImageTextureCache: CVMetalTextureCache!
    // 人体画像テクスチャ
    var alphaTexture: MTLTexture?       // 人体画像
    var whiteBlurTexture: MTLTexture!   // 人体画像を白色にして拡大・ブラーしたテクスチャ
    var yellowBlurTexture: MTLTexture!  // 人体画像を黄色にして拡大・ブラーしたテクスチャ
    // 画面サイズ
    var viewportSize: CGSize = CGSize()
    var viewportSizeDidChange: Bool = false
    // 人体画像加工時のコンピュートシェーダーのスレッドグループサイズ
    var threadgroupSize = MTLSizeMake(32, 32, 1)
    // アニメーションカウント
    var time = 0
    
    init(session: ARSession, metalDevice device: MTLDevice, mtkView: MTKView) {
        self.session = session
        self.device = device
        self.mtkView = mtkView
        matteGenerator = ARMatteGenerator(device: device, matteResolution: .half)
        loadMetal()
    }
    
    func drawRectResized(size: CGSize) {
        viewportSize = size
        viewportSizeDidChange = true
    }
    
    func update() {
        _ = inFlightSemaphore.wait(timeout: DispatchTime.distantFuture)
        
        let commandBuffer = commandQueue.makeCommandBuffer()!
        // レンダリング中にカメラキャプチャしたテクスチャが解放されないように保持
        var textures = [capturedImageTextureY, capturedImageTextureCbCr]
        commandBuffer.addCompletedHandler { [weak self] commandBuffer in
            if let strongSelf = self {
                strongSelf.inFlightSemaphore.signal()
            }
            textures.removeAll()
        }
        // カメラキャプチャテクスチャ取得（Y、CbCrの２つ)
        guard let currentFrame = session.currentFrame else { return }
        let pixelBuffer = currentFrame.capturedImage
        if CVPixelBufferGetPlaneCount(pixelBuffer) < 2 { return }
        capturedImageTextureY = createTexture(fromPixelBuffer: pixelBuffer, pixelFormat: .r8Unorm, planeIndex: 0)
        capturedImageTextureCbCr = createTexture(fromPixelBuffer: pixelBuffer, pixelFormat: .rg8Unorm, planeIndex: 1)
        // 画面サイズに応じてuv座標を設定
        if viewportSizeDidChange {
            viewportSizeDidChange = false
            // Update the texture coordinates of our image plane to aspect fill the viewport
            let displayToCameraTransform = currentFrame.displayTransform(for: .portrait, viewportSize: viewportSize).inverted()
            
            let vertexData = imagePlaneVertexBuffer.contents().assumingMemoryBound(to: Float.self)
            for index in 0...3 {
                let textureCoordIndex = 4 * index + 2   // kImagePlaneVertexData が 頂点座標(x,y) + uv座標(u,v)になっている。uv設定するので +2
                let textureCoord = CGPoint(x: CGFloat(kImagePlaneVertexData[textureCoordIndex]), y: CGFloat(kImagePlaneVertexData[textureCoordIndex + 1]))
                let transformedCoord = textureCoord.applying(displayToCameraTransform)
                // キャプチャ画像
                vertexData[textureCoordIndex] = Float(transformedCoord.x)
                vertexData[textureCoordIndex + 1] = Float(transformedCoord.y)
            }
        }
        // 人体画像取得
        alphaTexture = matteGenerator.generateMatte(from: currentFrame, commandBuffer: commandBuffer)
        // ブラーの効果を人体より大きく見せたいので人体画像を拡大。ついでに白色、黄色の２色分のテクスチャを生成。
        if let width = alphaTexture?.width, let height = alphaTexture?.height {
            let colorDesc = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .bgra8Unorm,
                                                                     width: width, height: height, mipmapped: false)
            colorDesc.usage = [.shaderRead, .shaderWrite]
            whiteBlurTexture = device.makeTexture(descriptor: colorDesc)
            yellowBlurTexture = device.makeTexture(descriptor: colorDesc)

            let threadCountW = (width + self.threadgroupSize.width - 1) / self.threadgroupSize.width
            let threadCountH = (height + self.threadgroupSize.height - 1) / self.threadgroupSize.height
            let threadgroupCount = MTLSizeMake(threadCountW, threadCountH, 1)
            
            let computeEncoder = commandBuffer.makeComputeCommandEncoder()!
            
            computeEncoder.setComputePipelineState(computeState)
            computeEncoder.setTexture(alphaTexture, index: 0)
            computeEncoder.setTexture(whiteBlurTexture, index: 1)
            computeEncoder.setTexture(yellowBlurTexture, index: 2)
            computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
            computeEncoder.endEncoding()
        }
        // 時間でブラーの大きさを変える
        time += 1
        // ブラー（白）
        let whiteIntensity = Int((sin(Float(time)/3) + 2) * 30) | 0x01  // MPSImageTentのサイズには奇数を指定する必要がある。
        let kernel1 = MPSImageTent(device: device, kernelWidth: whiteIntensity, kernelHeight: whiteIntensity)
        kernel1.encode(commandBuffer: commandBuffer,
                      inPlaceTexture: &whiteBlurTexture!, fallbackCopyAllocator: nil)
        // ブラー（黄）
        let yellowIntensity = Int((sin(Float(time)/3) + 2) * 100) | 0x01
        let kernel2 = MPSImageTent(device: device, kernelWidth: yellowIntensity, kernelHeight: yellowIntensity)
        kernel2.encode(commandBuffer: commandBuffer,
                      inPlaceTexture: &yellowBlurTexture!, fallbackCopyAllocator: nil)
        // キャプチャ画像＋ブラー（白・黄色）合成
        guard let renderPassDescriptor = mtkView.currentRenderPassDescriptor, let currentDrawable = mtkView.currentDrawable else { return }
        let compositeRenderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor)!
        compositeImagesWithEncoder(renderEncoder: compositeRenderEncoder)
        compositeRenderEncoder.endEncoding()
        
        commandBuffer.present(currentDrawable)
        commandBuffer.commit()
    }
    
    func loadMetal() {
        commandQueue = device.makeCommandQueue()

        let imagePlaneVertexDataCount = kImagePlaneVertexData.count * MemoryLayout<Float>.size
        imagePlaneVertexBuffer = device.makeBuffer(bytes: kImagePlaneVertexData, length: imagePlaneVertexDataCount, options: [])
        // カメラキャプチャ画像のキャッシュ
        var textureCache: CVMetalTextureCache?
        CVMetalTextureCacheCreate(nil, nil, device, nil, &textureCache)
        capturedImageTextureCache = textureCache
        // カメラキャプチャ画像＋人体画像の合成パイプライン
        let defaultLibrary = device.makeDefaultLibrary()!

        let compositePipelineStateDescriptor = MTLRenderPipelineDescriptor()
        compositePipelineStateDescriptor.sampleCount = 1
        compositePipelineStateDescriptor.vertexFunction = defaultLibrary.makeFunction(name: "compositeImageVertexTransform")!
        compositePipelineStateDescriptor.fragmentFunction = defaultLibrary.makeFunction(name: "compositeImageFragmentShader")!
        compositePipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
        try! compositePipelineState = device.makeRenderPipelineState(descriptor: compositePipelineStateDescriptor)
        
        // 人体縁取り用コンピュートシェーダー
        let edgeShader = defaultLibrary.makeFunction(name: "matteConvert")!
        computeState = try! self.device.makeComputePipelineState(function: edgeShader)
    }

    // キャプチャイメージからMTLTextureを生成
    func createTexture(fromPixelBuffer pixelBuffer: CVPixelBuffer, pixelFormat: MTLPixelFormat, planeIndex: Int) -> CVMetalTexture? {
        let width = CVPixelBufferGetWidthOfPlane(pixelBuffer, planeIndex)
        let height = CVPixelBufferGetHeightOfPlane(pixelBuffer, planeIndex)
        
        var texture: CVMetalTexture? = nil
        let status = CVMetalTextureCacheCreateTextureFromImage(nil, capturedImageTextureCache, pixelBuffer, nil, pixelFormat,
                                                               width, height, planeIndex, &texture)
        if status != kCVReturnSuccess {
            texture = nil
        }
        return texture
    }

    func compositeImagesWithEncoder(renderEncoder: MTLRenderCommandEncoder) {
        guard let textureY = capturedImageTextureY, let textureCbCr = capturedImageTextureCbCr else { return }
        renderEncoder.setCullMode(.none)
        renderEncoder.setRenderPipelineState(compositePipelineState)

        renderEncoder.setVertexBuffer(imagePlaneVertexBuffer, offset: 0, index: 0)

        renderEncoder.setFragmentTexture(CVMetalTextureGetTexture(textureY), index: 0)
        renderEncoder.setFragmentTexture(CVMetalTextureGetTexture(textureCbCr), index: 1)
        renderEncoder.setFragmentTexture(whiteBlurTexture, index: 2)
        renderEncoder.setFragmentTexture(yellowBlurTexture, index: 3)
        renderEncoder.setFragmentTexture(alphaTexture, index: 4)
        renderEncoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)
    }
}

・シェーダー

Shaders.metal

typedef struct {
    float2 position [[attribute(kVertexAttributePosition)]];
    float2 texCoord [[attribute(kVertexAttributeTexcoord)]];
} ImageVertex;


typedef struct {
    float4 position [[position]];
    float2 texCoord;
} ImageColorInOut;

vertex ImageColorInOut capturedImageVertexTransform(ImageVertex in [[stage_in]]) {
    ImageColorInOut out;
    out.position = float4(in.position, 0.0, 1.0);
    out.texCoord = in.texCoord;
    return out;
}

// Convert from YCbCr to rgb
float4 ycbcrToRGBTransform(float4 y, float4 CbCr) {
    const float4x4 ycbcrToRGBTransform = float4x4(
      float4(+1.0000f, +1.0000f, +1.0000f, +0.0000f),
      float4(+0.0000f, -0.3441f, +1.7720f, +0.0000f),
      float4(+1.4020f, -0.7141f, +0.0000f, +0.0000f),
      float4(-0.7010f, +0.5291f, -0.8860f, +1.0000f)
    );

    float4 ycbcr = float4(y.r, CbCr.rg, 1.0);
    return ycbcrToRGBTransform * ycbcr;
}

// This defines the captured image fragment function.
fragment float4 capturedImageFragmentShader(ImageColorInOut in [[stage_in]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(kTextureIndexY) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(kTextureIndexCbCr) ]]) {
    
    constexpr sampler colorSampler(mip_filter::linear,
                                   mag_filter::linear,
                                   min_filter::linear);
    
    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    return ycbcrToRGBTransform(capturedImageTextureY.sample(colorSampler, in.texCoord),
                               capturedImageTextureCbCr.sample(colorSampler, in.texCoord));
}

typedef struct {
    float2 position;
    float2 texCoord;
} CompositeVertex;

typedef struct {
    float4 position [[position]];
    float2 texCoordCamera;
} CompositeColorInOut;

// Composite the image vertex function.
vertex CompositeColorInOut compositeImageVertexTransform(const device CompositeVertex* cameraVertices [[ buffer(0) ]],
                                                         unsigned int vid [[ vertex_id ]]) {
    CompositeColorInOut out;

    const device CompositeVertex& cv = cameraVertices[vid];

    out.position = float4(cv.position, 0.0, 1.0);
    out.texCoordCamera = cv.texCoord;

    return out;
}

// Composite the image fragment function.
fragment half4 compositeImageFragmentShader(CompositeColorInOut in [[ stage_in ]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(0) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(1) ]],
                                            texture2d<float, access::sample> whiteColorTexture [[ texture(2) ]],
                                            texture2d<float, access::sample> yellowColorTexture [[ texture(3) ]],
                                            texture2d<float, access::sample> alphaTexture [[ texture(4) ]])
{
    constexpr sampler s(address::clamp_to_edge, filter::linear);

    float2 cameraTexCoord = in.texCoordCamera;

    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    float4 rgb = ycbcrToRGBTransform(capturedImageTextureY.sample(s, cameraTexCoord), capturedImageTextureCbCr.sample(s, cameraTexCoord));

    half4 cameraColor = half4(rgb);
    half4 whiteColor = half4(whiteColorTexture.sample(s, cameraTexCoord));
    half4 yellowColor = half4(yellowColorTexture.sample(s, cameraTexCoord)) * 2.0;
        
    return cameraColor + whiteColor + yellowColor;
}

// （拡大した人体画像　- 人体画像）で人体の縁を作って、それを白、黄のテクスチャとして出力
kernel void matteConvert(texture2d<half, access::read> inTexture [[ texture(0) ]],
                         texture2d<half, access::write> outWhiteTexture [[ texture(1) ]],
                         texture2d<half, access::write> outYellowTexture [[ texture(2) ]],
                         uint2 gid [[thread_position_in_grid]]) {
    
    uint2 textureIndex(gid);
    if (inTexture.read(textureIndex).r > 0.1) {
        // 人体部分は色なし
        outWhiteTexture.write(half4(0.0), gid);
        outYellowTexture.write(half4(0.0), gid);
        return;
    }
    
    // 拡大
    constexpr int scale = 15;
    constexpr int radius = scale / 2;
    half color = 0.0;
    
    for (int i=0; i<scale; i++) {
        for (int j=0; j<scale; j++) {
            uint2 textureIndex(gid.x + (i - radius), gid.y + (j - radius));
            half alpha = inTexture.read(textureIndex).r;
            if (alpha > 0.1) {
                color = 1.0;
                break;
            }
        }
        if (color > 0.0) {
            break;
        }
    }

    outWhiteTexture.write(half4(color, color, color, 1.0), gid);
    outYellowTexture.write(half4(color, color, 0.0, 1.0), gid);
}

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

ARKit+Metal で みんな超サイヤ人