More than 5 years have passed since last update.

LLVMでの効率的なUnion型の扱い

LLVM

Last updated at 2018-12-11Posted at 2018-12-11

オレオレ言語作り隊アドベントカレンダー12日目です。

自作言語の紹介にしようと思いましたが、~~時間がない~~恥ずかしいので、LLVM使っててはまったunion型の話をしようと思います。

TL;DR

Union型の表現で迷ったらとりあえずワードの配列で（例えば[4 x i64]）。

導入

Union型

Cでおなじみのunion型はメンバが同じメモリ領域を共有している型です。下の例ではパディング等を考えなければ、foo_tのサイズはsizeof intとsizeof doubleの大きい方になります。barを代入してbazの値を取り出すみたいなこともできます。

union foo_t {
  int bar;
  double baz;
};

Rust等のモダンな言語ではtagged unionというより型安全な形で目にすることが多いです。また、関数型言語における代数的データ型の表現にも用いられます。

というわけで、union型大人気です。

LLVM IRには無い

LLVMの中間表現にはunion型に直接対応するものはありません。なので構造体等を使って頑張って実装します。基本的にはメンバのうち最大サイズのもののメモリをalloca命令やmalloc関数で確保して、そのメモリのアドレスをビットキャストして各メンバを読みます。

メモリ使いたくない

自作言語ツクルンジャーとしては、alloca命令やmalloc関数なんてものは忌み嫌うべき存在です。Union型の中身を解釈し直すだけなら、全部レジスタ内の操作で済ませて欲しいものです。また、union型のサイズが小さい場合はunboxedなまま扱いたいです。

問題

次のような、あるメンバを取り、対応するunboxedなtagged unionを返すコンストラクタ関数を考えます。

define { i64, 何らかの型 } @constructor(double) {
  ...
}

とりあえずメンバ間の共通表現はバイト列でいいだろうと思って、

foo.ll

define { i64, [8 x i8] } @constructor(double) {
  %2 = alloca { i64, [8 x i8] }
  %3 = bitcast { i64, [8 x i8] }* %2 to { i64, double }*
  %4 = getelementptr { i64, double }, { i64, double }* %3, i32 0, i32 0
  store i64 0, i64* %4
  %5 = getelementptr { i64, double }, { i64, double }* %3, i32 0, i32 1
  store double %0, double* %5
  %6 = load { i64, [8 x i8] }, { i64, [8 x i8] }* %2
  ret { i64, [8 x i8] } %6
}

こうしてみました。以下がllcでのコンパイル結果です。

foo.s

        .text
        .file   "foo.ll"
        .globl  constructor             # -- Begin function constructor
        .p2align        4, 0x90
        .type   constructor,@function
constructor:                            # @constructor
        .cfi_startproc
# %bb.0:
        movq    $0, -16(%rsp)
        movsd   %xmm0, -8(%rsp)
        movq    -8(%rsp), %rax
        movq    %rax, 8(%rdi)
        movq    $0, (%rdi)
        movq    %rdi, %rax
        retq
.Lfunc_end0:
        .size   constructor, .Lfunc_end0-constructor
        .cfi_endproc
                                        # -- End function

        .section        ".note.GNU-stack","",@progbits

悪くなさそう。ここで最適化をかけてみます。

opt -O3 foo.ll -S > foo.opt.ll

LLVM IRでの結果。

foo.opt.ll

; ModuleID = 'foo.ll'
source_filename = "foo.ll"

; Function Attrs: norecurse nounwind readnone
define { i64, [8 x i8] } @constructor(double) local_unnamed_addr #0 {
  %2 = bitcast double %0 to i64
  %.sroa.2.8.extract.trunc = trunc i64 %2 to i8
  %.fca.1.0.insert = insertvalue { i64, [8 x i8] } { i64 0, [8 x i8] undef }, i8 %.sroa.2.8.extract.trunc, 1, 0                                                                                              
  %.sroa.2.9.extract.shift = lshr i64 %2, 8
  %.sroa.2.9.extract.trunc = trunc i64 %.sroa.2.9.extract.shift to i8
  %.fca.1.1.insert = insertvalue { i64, [8 x i8] } %.fca.1.0.insert, i8 %.sroa.2.9.extract.trunc, 1, 1
  %.sroa.2.10.extract.shift = lshr i64 %2, 16
  %.sroa.2.10.extract.trunc = trunc i64 %.sroa.2.10.extract.shift to i8
  %.fca.1.2.insert = insertvalue { i64, [8 x i8] } %.fca.1.1.insert, i8 %.sroa.2.10.extract.trunc, 1, 2                                                                                                      
  %.sroa.2.11.extract.shift = lshr i64 %2, 24
  %.sroa.2.11.extract.trunc = trunc i64 %.sroa.2.11.extract.shift to i8
  %.fca.1.3.insert = insertvalue { i64, [8 x i8] } %.fca.1.2.insert, i8 %.sroa.2.11.extract.trunc, 1, 3                                                                                                      
  %.sroa.2.12.extract.shift = lshr i64 %2, 32
  %.sroa.2.12.extract.trunc = trunc i64 %.sroa.2.12.extract.shift to i8
  %.fca.1.4.insert = insertvalue { i64, [8 x i8] } %.fca.1.3.insert, i8 %.sroa.2.12.extract.trunc, 1, 4                                                                                                      
  %.sroa.2.13.extract.shift = lshr i64 %2, 40
  %.sroa.2.13.extract.trunc = trunc i64 %.sroa.2.13.extract.shift to i8
  %.fca.1.5.insert = insertvalue { i64, [8 x i8] } %.fca.1.4.insert, i8 %.sroa.2.13.extract.trunc, 1, 5                                                                                                      
  %.sroa.2.14.extract.shift = lshr i64 %2, 48
  %.sroa.2.14.extract.trunc = trunc i64 %.sroa.2.14.extract.shift to i8
  %.fca.1.6.insert = insertvalue { i64, [8 x i8] } %.fca.1.5.insert, i8 %.sroa.2.14.extract.trunc, 1, 6                                                                                                      
  %.sroa.2.15.extract.shift = lshr i64 %2, 56
  %.sroa.2.15.extract.trunc = trunc i64 %.sroa.2.15.extract.shift to i8
  %.fca.1.7.insert = insertvalue { i64, [8 x i8] } %.fca.1.6.insert, i8 %.sroa.2.15.extract.trunc, 1, 7                                                                                                      
  ret { i64, [8 x i8] } %.fca.1.7.insert
}

attributes #0 = { norecurse nounwind readnone }

???

コンパイル後。

foo.opt.s

        .text
        .file   "foo.ll"
        .globl  constructor             # -- Begin function constructor
        .p2align        4, 0x90
        .type   constructor,@function
constructor:                            # @constructor
# %bb.0:
        movq    %xmm0, %rax
        movq    %rax, %r8
        movq    %rax, %r9
        movq    %rax, %rsi
        movq    %rax, %rcx
        movq    %rax, %rdx
        movb    %ah, 9(%rdi)
        movb    %al, 8(%rdi)
        shrq    $16, %rax
        shrq    $24, %r8
        shrq    $32, %r9
        shrq    $40, %rsi
        shrq    $48, %rcx
        shrq    $56, %rdx
        movb    %dl, 15(%rdi)
        movb    %cl, 14(%rdi)
        movb    %sil, 13(%rdi)
        movb    %r9b, 12(%rdi)
        movb    %r8b, 11(%rdi)
        movb    %al, 10(%rdi)
        movq    $0, (%rdi)
        movq    %rdi, %rax
        retq
.Lfunc_end0:
        .size   constructor, .Lfunc_end0-constructor
                                        # -- End function

        .section        ".note.GNU-stack","",@progbits

1バイトずつご丁寧に読み書きされてます。

解決策

ワード（i32やi64）の配列をunion型の表現として使います。具体的にはtagged union型の[8 x i8]の部分を[1 x i64]で置き換えます。

bar.ll

define { i64, [1 x i64] } @constructor(double) {
  %2 = alloca { i64, [1 x i64] }
  %3 = bitcast { i64, [1 x i64] }* %2 to { i64, double }*
  %4 = getelementptr { i64, double }, { i64, double }* %3, i32 0, i32 0
  store i64 0, i64* %4
  %5 = getelementptr { i64, double }, { i64, double }* %3, i32 0, i32 1
  store double %0, double* %5
  %6 = load { i64, [1 x i64] }, { i64, [1 x i64] }* %2
  ret { i64, [1 x i64] } %6
}

bar.s

        .text
        .file   "bar.ll"
        .globl  constructor             # -- Begin function constructor
        .p2align        4, 0x90
        .type   constructor,@function
constructor:                            # @constructor
        .cfi_startproc
# %bb.0:
        movq    $0, -16(%rsp)
        movsd   %xmm0, -8(%rsp)
        movq    -8(%rsp), %rdx
        xorl    %eax, %eax
        retq
.Lfunc_end0:
        .size   constructor, .Lfunc_end0-constructor
        .cfi_endproc
                                        # -- End function

        .section        ".note.GNU-stack","",@progbits

最適化後。

bar.opt.ll

; ModuleID = 'bar.ll'
source_filename = "bar.ll"

; Function Attrs: norecurse nounwind readnone
define { i64, [1 x i64] } @constructor(double) local_unnamed_addr #0 {
  %2 = bitcast double %0 to i64
  %.fca.1.0.insert = insertvalue { i64, [1 x i64] } { i64 0, [1 x i64] undef }, i64 %2, 1, 0
  ret { i64, [1 x i64] } %.fca.1.0.insert
}

attributes #0 = { norecurse nounwind readnone }

bar.opt.s

        .text
        .file   "bar.ll"
        .globl  constructor             # -- Begin function constructor
        .p2align        4, 0x90
        .type   constructor,@function
constructor:                            # @constructor
# %bb.0:
        movq    %xmm0, %rdx
        xorl    %eax, %eax
        retq
.Lfunc_end0:
        .size   constructor, .Lfunc_end0-constructor
                                        # -- End function

        .section        ".note.GNU-stack","",@progbits

大変良い。

まとめ

Union型の表現はワードの配列にしましょう。生成される命令列が少なく済みます。

ちなみに、LLVMの整数型はiの後に任意の自然数を取ることができます。つまり、今回の例では[16 x i8]で表されるようなunion型をi128にしても良いわけです。この場合についても書こうと思ったのですが、~~時間がなかった~~疲れたため皆さんの宿題にします。答えは、どうでもいいメモリ領域をゼロ埋めする命令が無駄に入るでした。

おまけ（広告）

Ein言語という自作言語を自作中です。名前は昨日決めました。Haskellのような遅延評価による関数型言語です。LLVMで書いていて、最近、関数定義ができるようになりました。ついった。

言語実装Advent Calendar 2018

Day 12

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up