Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

ARKit+Metal で みんな超サイヤ人

Last updated at Posted at 2020-10-17

People OcclusionのAppleのサンプルを調べる過程で超サイヤ人になったときのエフェクトが出来そうだったので挑戦。

MetalKit で GPU を使いこなす







kernel void matteConvert(texture2d<half, access::read> inTexture [[ texture(0) ]],
                         texture2d<half, access::write> outWhiteTexture [[ texture(1) ]],
                         texture2d<half, access::write> outYellowTexture [[ texture(2) ]],
                         uint2 gid [[thread_position_in_grid]]) {
    uint2 textureIndex(gid);
    if (inTexture.read(textureIndex).r > 0.1) {
        // 人体部分は色なし
        outWhiteTexture.write(half4(0.0), gid);
        outYellowTexture.write(half4(0.0), gid);
    // 拡大
    constexpr int scale = 15;
    constexpr int radius = scale / 2;
    half color = 0.0;
    for (int i=0; i<scale; i++) {
        for (int j=0; j<scale; j++) {
            uint2 textureIndex(gid.x + (i - radius), gid.y + (j - radius));
            half alpha = inTexture.read(textureIndex).r;
            if (alpha > 0.1) {
                color = 1.0;
        if (color > 0.0) {

    outWhiteTexture.write(half4(color, color, color, 1.0), gid);
    outYellowTexture.write(half4(color, color, 0.0, 1.0), gid);


縁取り結果(Xcode12の Capture GPU Frame で確認)


ブラーは Metal Performance Shader(Image Filter) を利用。 Metal Performance Shader は独自のシェーダーと簡単に組み合わせることができる。

// 時間でブラーの大きさを変える
time += 1
// ブラー(白)
let whiteIntensity = Int((sin(Float(time)/3) + 2) * 30) | 0x01  // MPSImageTentのサイズには奇数を指定する必要がある
let kernel1 = MPSImageTent(device: device, kernelWidth: whiteIntensity, kernelHeight: whiteIntensity)
kernel1.encode(commandBuffer: commandBuffer,
              inPlaceTexture: &whiteBlurTexture!, fallbackCopyAllocator: nil)
// ブラー(黄)
let yellowIntensity = Int((sin(Float(time)/3) + 2) * 100) | 0x01
let kernel2 = MPSImageTent(device: device, kernelWidth: yellowIntensity, kernelHeight: yellowIntensity)
kernel2.encode(commandBuffer: commandBuffer,
              inPlaceTexture: &yellowBlurTexture!, fallbackCopyAllocator: nil)

Metal Performance Shader のフィルターは色々はあるが、いくつか試したところ MPSImageTent が一番しっくりきたのでこれを採用。



fragment half4 compositeImageFragmentShader(CompositeColorInOut in [[ stage_in ]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(0) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(1) ]],
                                            texture2d<float, access::sample> whiteColorTexture [[ texture(2) ]],
                                            texture2d<float, access::sample> yellowColorTexture [[ texture(3) ]],
                                            texture2d<float, access::sample> alphaTexture [[ texture(4) ]])
    constexpr sampler s(address::clamp_to_edge, filter::linear);

    float2 cameraTexCoord = in.texCoordCamera;

    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    float4 rgb = ycbcrToRGBTransform(capturedImageTextureY.sample(s, cameraTexCoord), capturedImageTextureCbCr.sample(s, cameraTexCoord));

    half4 cameraColor = half4(rgb);
    half4 whiteColor = half4(whiteColorTexture.sample(s, cameraTexCoord));
    half4 yellowColor = half4(yellowColorTexture.sample(s, cameraTexCoord)) * 2.0;
    return cameraColor + whiteColor + yellowColor;




class ViewController: UIViewController, MTKViewDelegate {
    var session = ARSession()
    var renderer: Renderer!
    override func viewDidLoad() {
        if let view = self.view as? MTKView {
            view.device = MTLCreateSystemDefaultDevice()
            view.backgroundColor = UIColor.clear
            view.delegate = self
            renderer = Renderer(session: session, metalDevice: view.device!, mtkView: view)
            renderer.drawRectResized(size: view.bounds.size)
    override func viewWillAppear(_ animated: Bool) {
        let configuration = ARWorldTrackingConfiguration()
        configuration.frameSemantics = .personSegmentation
    func mtkView(_ view: MTKView, drawableSizeWillChange size: CGSize) {
        renderer.drawRectResized(size: size)
    func draw(in view: MTKView) {

let kMaxBuffersInFlight: Int = 3

let kImagePlaneVertexData: [Float] = [
    -1.0, -1.0, 0.0, 1.0,
    1.0, -1.0, 1.0, 1.0,
    -1.0, 1.0, 0.0, 0.0,
    1.0, 1.0, 1.0, 0.0

class Renderer {
    let session: ARSession
    let matteGenerator: ARMatteGenerator
    let device: MTLDevice
    let inFlightSemaphore = DispatchSemaphore(value: kMaxBuffersInFlight)
    var mtkView: MTKView
    var commandQueue: MTLCommandQueue!
    var imagePlaneVertexBuffer: MTLBuffer!
    // 最終画像合成用PipelineState
    var compositePipelineState: MTLRenderPipelineState!
    // 人体画像の拡大加工用PipelineState
    var computeState: MTLComputePipelineState!
    // キャプチャ画像テクスチャ
    var capturedImageTextureY: CVMetalTexture?
    var capturedImageTextureCbCr: CVMetalTexture?
    var capturedImageTextureCache: CVMetalTextureCache!
    // 人体画像テクスチャ
    var alphaTexture: MTLTexture?       // 人体画像
    var whiteBlurTexture: MTLTexture!   // 人体画像を白色にして拡大・ブラーしたテクスチャ
    var yellowBlurTexture: MTLTexture!  // 人体画像を黄色にして拡大・ブラーしたテクスチャ
    // 画面サイズ
    var viewportSize: CGSize = CGSize()
    var viewportSizeDidChange: Bool = false
    // 人体画像加工時のコンピュートシェーダーのスレッドグループサイズ
    var threadgroupSize = MTLSizeMake(32, 32, 1)
    // アニメーションカウント
    var time = 0
    init(session: ARSession, metalDevice device: MTLDevice, mtkView: MTKView) {
        self.session = session
        self.device = device
        self.mtkView = mtkView
        matteGenerator = ARMatteGenerator(device: device, matteResolution: .half)
    func drawRectResized(size: CGSize) {
        viewportSize = size
        viewportSizeDidChange = true
    func update() {
        _ = inFlightSemaphore.wait(timeout: DispatchTime.distantFuture)
        let commandBuffer = commandQueue.makeCommandBuffer()!
        // レンダリング中にカメラキャプチャしたテクスチャが解放されないように保持
        var textures = [capturedImageTextureY, capturedImageTextureCbCr]
        commandBuffer.addCompletedHandler { [weak self] commandBuffer in
            if let strongSelf = self {
        // カメラキャプチャテクスチャ取得(Y、CbCrの2つ)
        guard let currentFrame = session.currentFrame else { return }
        let pixelBuffer = currentFrame.capturedImage
        if CVPixelBufferGetPlaneCount(pixelBuffer) < 2 { return }
        capturedImageTextureY = createTexture(fromPixelBuffer: pixelBuffer, pixelFormat: .r8Unorm, planeIndex: 0)
        capturedImageTextureCbCr = createTexture(fromPixelBuffer: pixelBuffer, pixelFormat: .rg8Unorm, planeIndex: 1)
        // 画面サイズに応じてuv座標を設定
        if viewportSizeDidChange {
            viewportSizeDidChange = false
            // Update the texture coordinates of our image plane to aspect fill the viewport
            let displayToCameraTransform = currentFrame.displayTransform(for: .portrait, viewportSize: viewportSize).inverted()
            let vertexData = imagePlaneVertexBuffer.contents().assumingMemoryBound(to: Float.self)
            for index in 0...3 {
                let textureCoordIndex = 4 * index + 2   // kImagePlaneVertexData が 頂点座標(x,y) + uv座標(u,v)になっている。uv設定するので +2
                let textureCoord = CGPoint(x: CGFloat(kImagePlaneVertexData[textureCoordIndex]), y: CGFloat(kImagePlaneVertexData[textureCoordIndex + 1]))
                let transformedCoord = textureCoord.applying(displayToCameraTransform)
                // キャプチャ画像
                vertexData[textureCoordIndex] = Float(transformedCoord.x)
                vertexData[textureCoordIndex + 1] = Float(transformedCoord.y)
        // 人体画像取得
        alphaTexture = matteGenerator.generateMatte(from: currentFrame, commandBuffer: commandBuffer)
        // ブラーの効果を人体より大きく見せたいので人体画像を拡大。ついでに白色、黄色の2色分のテクスチャを生成。
        if let width = alphaTexture?.width, let height = alphaTexture?.height {
            let colorDesc = MTLTextureDescriptor.texture2DDescriptor(pixelFormat: .bgra8Unorm,
                                                                     width: width, height: height, mipmapped: false)
            colorDesc.usage = [.shaderRead, .shaderWrite]
            whiteBlurTexture = device.makeTexture(descriptor: colorDesc)
            yellowBlurTexture = device.makeTexture(descriptor: colorDesc)

            let threadCountW = (width + self.threadgroupSize.width - 1) / self.threadgroupSize.width
            let threadCountH = (height + self.threadgroupSize.height - 1) / self.threadgroupSize.height
            let threadgroupCount = MTLSizeMake(threadCountW, threadCountH, 1)
            let computeEncoder = commandBuffer.makeComputeCommandEncoder()!
            computeEncoder.setTexture(alphaTexture, index: 0)
            computeEncoder.setTexture(whiteBlurTexture, index: 1)
            computeEncoder.setTexture(yellowBlurTexture, index: 2)
            computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
        // 時間でブラーの大きさを変える
        time += 1
        // ブラー(白)
        let whiteIntensity = Int((sin(Float(time)/3) + 2) * 30) | 0x01  // MPSImageTentのサイズには奇数を指定する必要がある
        let kernel1 = MPSImageTent(device: device, kernelWidth: whiteIntensity, kernelHeight: whiteIntensity)
        kernel1.encode(commandBuffer: commandBuffer,
                      inPlaceTexture: &whiteBlurTexture!, fallbackCopyAllocator: nil)
        // ブラー(黄)
        let yellowIntensity = Int((sin(Float(time)/3) + 2) * 100) | 0x01
        let kernel2 = MPSImageTent(device: device, kernelWidth: yellowIntensity, kernelHeight: yellowIntensity)
        kernel2.encode(commandBuffer: commandBuffer,
                      inPlaceTexture: &yellowBlurTexture!, fallbackCopyAllocator: nil)
        // キャプチャ画像+ブラー(白・黄色)合成
        guard let renderPassDescriptor = mtkView.currentRenderPassDescriptor, let currentDrawable = mtkView.currentDrawable else { return }
        let compositeRenderEncoder = commandBuffer.makeRenderCommandEncoder(descriptor: renderPassDescriptor)!
        compositeImagesWithEncoder(renderEncoder: compositeRenderEncoder)
    func loadMetal() {
        commandQueue = device.makeCommandQueue()

        let imagePlaneVertexDataCount = kImagePlaneVertexData.count * MemoryLayout<Float>.size
        imagePlaneVertexBuffer = device.makeBuffer(bytes: kImagePlaneVertexData, length: imagePlaneVertexDataCount, options: [])
        // カメラキャプチャ画像のキャッシュ
        var textureCache: CVMetalTextureCache?
        CVMetalTextureCacheCreate(nil, nil, device, nil, &textureCache)
        capturedImageTextureCache = textureCache
        // カメラキャプチャ画像+人体画像の合成パイプライン
        let defaultLibrary = device.makeDefaultLibrary()!

        let compositePipelineStateDescriptor = MTLRenderPipelineDescriptor()
        compositePipelineStateDescriptor.sampleCount = 1
        compositePipelineStateDescriptor.vertexFunction = defaultLibrary.makeFunction(name: "compositeImageVertexTransform")!
        compositePipelineStateDescriptor.fragmentFunction = defaultLibrary.makeFunction(name: "compositeImageFragmentShader")!
        compositePipelineStateDescriptor.colorAttachments[0].pixelFormat = .bgra8Unorm
        try! compositePipelineState = device.makeRenderPipelineState(descriptor: compositePipelineStateDescriptor)
        // 人体縁取り用コンピュートシェーダー
        let edgeShader = defaultLibrary.makeFunction(name: "matteConvert")!
        computeState = try! self.device.makeComputePipelineState(function: edgeShader)

    // キャプチャイメージからMTLTextureを生成
    func createTexture(fromPixelBuffer pixelBuffer: CVPixelBuffer, pixelFormat: MTLPixelFormat, planeIndex: Int) -> CVMetalTexture? {
        let width = CVPixelBufferGetWidthOfPlane(pixelBuffer, planeIndex)
        let height = CVPixelBufferGetHeightOfPlane(pixelBuffer, planeIndex)
        var texture: CVMetalTexture? = nil
        let status = CVMetalTextureCacheCreateTextureFromImage(nil, capturedImageTextureCache, pixelBuffer, nil, pixelFormat,
                                                               width, height, planeIndex, &texture)
        if status != kCVReturnSuccess {
            texture = nil
        return texture

    func compositeImagesWithEncoder(renderEncoder: MTLRenderCommandEncoder) {
        guard let textureY = capturedImageTextureY, let textureCbCr = capturedImageTextureCbCr else { return }

        renderEncoder.setVertexBuffer(imagePlaneVertexBuffer, offset: 0, index: 0)

        renderEncoder.setFragmentTexture(CVMetalTextureGetTexture(textureY), index: 0)
        renderEncoder.setFragmentTexture(CVMetalTextureGetTexture(textureCbCr), index: 1)
        renderEncoder.setFragmentTexture(whiteBlurTexture, index: 2)
        renderEncoder.setFragmentTexture(yellowBlurTexture, index: 3)
        renderEncoder.setFragmentTexture(alphaTexture, index: 4)
        renderEncoder.drawPrimitives(type: .triangleStrip, vertexStart: 0, vertexCount: 4)


typedef struct {
    float2 position [[attribute(kVertexAttributePosition)]];
    float2 texCoord [[attribute(kVertexAttributeTexcoord)]];
} ImageVertex;

typedef struct {
    float4 position [[position]];
    float2 texCoord;
} ImageColorInOut;

vertex ImageColorInOut capturedImageVertexTransform(ImageVertex in [[stage_in]]) {
    ImageColorInOut out;
    out.position = float4(in.position, 0.0, 1.0);
    out.texCoord = in.texCoord;
    return out;

// Convert from YCbCr to rgb
float4 ycbcrToRGBTransform(float4 y, float4 CbCr) {
    const float4x4 ycbcrToRGBTransform = float4x4(
      float4(+1.0000f, +1.0000f, +1.0000f, +0.0000f),
      float4(+0.0000f, -0.3441f, +1.7720f, +0.0000f),
      float4(+1.4020f, -0.7141f, +0.0000f, +0.0000f),
      float4(-0.7010f, +0.5291f, -0.8860f, +1.0000f)

    float4 ycbcr = float4(y.r, CbCr.rg, 1.0);
    return ycbcrToRGBTransform * ycbcr;

// This defines the captured image fragment function.
fragment float4 capturedImageFragmentShader(ImageColorInOut in [[stage_in]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(kTextureIndexY) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(kTextureIndexCbCr) ]]) {
    constexpr sampler colorSampler(mip_filter::linear,
    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    return ycbcrToRGBTransform(capturedImageTextureY.sample(colorSampler, in.texCoord),
                               capturedImageTextureCbCr.sample(colorSampler, in.texCoord));

typedef struct {
    float2 position;
    float2 texCoord;
} CompositeVertex;

typedef struct {
    float4 position [[position]];
    float2 texCoordCamera;
} CompositeColorInOut;

// Composite the image vertex function.
vertex CompositeColorInOut compositeImageVertexTransform(const device CompositeVertex* cameraVertices [[ buffer(0) ]],
                                                         unsigned int vid [[ vertex_id ]]) {
    CompositeColorInOut out;

    const device CompositeVertex& cv = cameraVertices[vid];

    out.position = float4(cv.position, 0.0, 1.0);
    out.texCoordCamera = cv.texCoord;

    return out;

// Composite the image fragment function.
fragment half4 compositeImageFragmentShader(CompositeColorInOut in [[ stage_in ]],
                                            texture2d<float, access::sample> capturedImageTextureY [[ texture(0) ]],
                                            texture2d<float, access::sample> capturedImageTextureCbCr [[ texture(1) ]],
                                            texture2d<float, access::sample> whiteColorTexture [[ texture(2) ]],
                                            texture2d<float, access::sample> yellowColorTexture [[ texture(3) ]],
                                            texture2d<float, access::sample> alphaTexture [[ texture(4) ]])
    constexpr sampler s(address::clamp_to_edge, filter::linear);

    float2 cameraTexCoord = in.texCoordCamera;

    // Sample Y and CbCr textures to get the YCbCr color at the given texture coordinate.
    float4 rgb = ycbcrToRGBTransform(capturedImageTextureY.sample(s, cameraTexCoord), capturedImageTextureCbCr.sample(s, cameraTexCoord));

    half4 cameraColor = half4(rgb);
    half4 whiteColor = half4(whiteColorTexture.sample(s, cameraTexCoord));
    half4 yellowColor = half4(yellowColorTexture.sample(s, cameraTexCoord)) * 2.0;
    return cameraColor + whiteColor + yellowColor;

// (拡大した人体画像 - 人体画像)で人体の縁を作って、それを白、黄のテクスチャとして出力
kernel void matteConvert(texture2d<half, access::read> inTexture [[ texture(0) ]],
                         texture2d<half, access::write> outWhiteTexture [[ texture(1) ]],
                         texture2d<half, access::write> outYellowTexture [[ texture(2) ]],
                         uint2 gid [[thread_position_in_grid]]) {
    uint2 textureIndex(gid);
    if (inTexture.read(textureIndex).r > 0.1) {
        // 人体部分は色なし
        outWhiteTexture.write(half4(0.0), gid);
        outYellowTexture.write(half4(0.0), gid);
    // 拡大
    constexpr int scale = 15;
    constexpr int radius = scale / 2;
    half color = 0.0;
    for (int i=0; i<scale; i++) {
        for (int j=0; j<scale; j++) {
            uint2 textureIndex(gid.x + (i - radius), gid.y + (j - radius));
            half alpha = inTexture.read(textureIndex).r;
            if (alpha > 0.1) {
                color = 1.0;
        if (color > 0.0) {

    outWhiteTexture.write(half4(color, color, color, 1.0), gid);
    outYellowTexture.write(half4(color, color, 0.0, 1.0), gid);

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?