LoginSignup
2
0

More than 5 years have passed since last update.

clang で末尾呼出し最適化を期待した階乗の関数をコンパイル

Posted at

`-O2' を指定したらとんでもないコード吐くなあ、なんだこれ?

# その内詳しい人が解説してくれるだろうという他力本願

$ clang -v
clang version 4.0.1 (tags/RELEASE_401/final)
Target: x86_64-unknown-windows-cygnus
Thread model: posix
InstalledDir: /usr/bin

$ cat -n factorial.c
     1  int factorial(int n)
     2  {
     3      if (n <= 1) {
     4          return 1;
     5      } else {
     6          return n * factorial(n - 1);
     7      }
     8  }

$ clang -Wall -Wextra -O0 -S factorial.c -o -
        .text
        .def     factorial;
        .scl    2;
        .type   32;
        .endef
        .globl  factorial
        .p2align        4, 0x90
factorial:                              # @factorial
.Lcfi0:
.seh_proc factorial
# BB#0:
        subq    $56, %rsp
.Lcfi1:
        .seh_stackalloc 56
.Lcfi2:
        .seh_endprologue
        movl    %ecx, 48(%rsp)
        cmpl    $1, 48(%rsp)
        jg      .LBB0_2
# BB#1:
        movl    $1, 52(%rsp)
        jmp     .LBB0_3
.LBB0_2:
        movl    48(%rsp), %eax
        movl    48(%rsp), %ecx
        subl    $1, %ecx
        movl    %eax, 44(%rsp)          # 4-byte Spill
        callq   factorial
        movl    44(%rsp), %ecx          # 4-byte Reload
        imull   %eax, %ecx
        movl    %ecx, 52(%rsp)
.LBB0_3:
        movl    52(%rsp), %eax
        addq    $56, %rsp
        retq
        .seh_handlerdata
        .text
.Lcfi3:
        .seh_endproc



$ clang -Wall -Wextra -O1 -S factorial.c -o -
        .text
        .def     factorial;
        .scl    2;
        .type   32;
        .endef
        .globl  factorial
        .p2align        4, 0x90
factorial:                              # @factorial
# BB#0:
                                        # kill: %ECX<def> %ECX<kill> %RCX<def>
        movl    $1, %eax
        cmpl    $2, %ecx
        jl      .LBB0_3
# BB#1:
        movl    $1, %eax
        .p2align        4, 0x90
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        imull   %ecx, %eax
        cmpl    $2, %ecx
        leal    -1(%rcx), %ecx
                                        # kill: %ECX<def> %ECX<kill> %RCX<def>
        jg      .LBB0_2
.LBB0_3:
        retq



$ clang -Wall -Wextra -O2 -S factorial.c -o -
        .text
        .def     factorial;
        .scl    2;
        .type   32;
        .endef
        .section        .rdata,"dr"
        .p2align        4
.LCPI0_0:
        .long   0                       # 0x0
        .long   4294967295              # 0xffffffff
        .long   4294967294              # 0xfffffffe
        .long   4294967293              # 0xfffffffd
.LCPI0_1:
        .long   1                       # 0x1
        .long   1                       # 0x1
        .long   1                       # 0x1
        .long   1                       # 0x1
.LCPI0_2:
        .long   4294967292              # 0xfffffffc
        .long   4294967292              # 0xfffffffc
        .long   4294967292              # 0xfffffffc
        .long   4294967292              # 0xfffffffc
.LCPI0_3:
        .long   4294967288              # 0xfffffff8
        .long   4294967288              # 0xfffffff8
        .long   4294967288              # 0xfffffff8
        .long   4294967288              # 0xfffffff8
.LCPI0_4:
        .long   4294967284              # 0xfffffff4
        .long   4294967284              # 0xfffffff4
        .long   4294967284              # 0xfffffff4
        .long   4294967284              # 0xfffffff4
.LCPI0_5:
        .long   4294967280              # 0xfffffff0
        .long   4294967280              # 0xfffffff0
        .long   4294967280              # 0xfffffff0
        .long   4294967280              # 0xfffffff0
.LCPI0_6:
        .long   4294967276              # 0xffffffec
        .long   4294967276              # 0xffffffec
        .long   4294967276              # 0xffffffec
        .long   4294967276              # 0xffffffec
.LCPI0_7:
        .long   4294967272              # 0xffffffe8
        .long   4294967272              # 0xffffffe8
        .long   4294967272              # 0xffffffe8
        .long   4294967272              # 0xffffffe8
.LCPI0_8:
        .long   4294967268              # 0xffffffe4
        .long   4294967268              # 0xffffffe4
        .long   4294967268              # 0xffffffe4
        .long   4294967268              # 0xffffffe4
.LCPI0_9:
        .long   4294967264              # 0xffffffe0
        .long   4294967264              # 0xffffffe0
        .long   4294967264              # 0xffffffe0
        .long   4294967264              # 0xffffffe0
        .text
        .globl  factorial
        .p2align        4, 0x90
factorial:                              # @factorial
.Lcfi0:
.seh_proc factorial
# BB#0:
        pushq   %rsi
.Lcfi1:
        .seh_pushreg 6
        subq    $144, %rsp
.Lcfi2:
        .seh_stackalloc 144
        movdqa  %xmm14, 128(%rsp)       # 16-byte Spill
.Lcfi3:
        .seh_savexmm 14, 128
        movdqa  %xmm13, 112(%rsp)       # 16-byte Spill
.Lcfi4:
        .seh_savexmm 13, 112
        movdqa  %xmm12, 96(%rsp)        # 16-byte Spill
.Lcfi5:
        .seh_savexmm 12, 96
        movdqa  %xmm11, 80(%rsp)        # 16-byte Spill
.Lcfi6:
        .seh_savexmm 11, 80
        movdqa  %xmm10, 64(%rsp)        # 16-byte Spill
.Lcfi7:
        .seh_savexmm 10, 64
        movdqa  %xmm9, 48(%rsp)         # 16-byte Spill
.Lcfi8:
        .seh_savexmm 9, 48
        movdqa  %xmm8, 32(%rsp)         # 16-byte Spill
.Lcfi9:
        .seh_savexmm 8, 32
        movdqa  %xmm7, 16(%rsp)         # 16-byte Spill
.Lcfi10:
        .seh_savexmm 7, 16
        movdqa  %xmm6, (%rsp)           # 16-byte Spill
.Lcfi11:
        .seh_savexmm 6, 0
.Lcfi12:
        .seh_endprologue
                                        # kill: %ECX<def> %ECX<kill> %RCX<def>
        movl    $1, %eax
        cmpl    $2, %ecx
        jl      .LBB0_13
# BB#1:
        movl    %ecx, %eax
        notl    %eax
        cmpl    $-4, %eax
        movl    $-3, %edx
        cmovgl  %eax, %edx
        leal    2(%rdx,%rcx), %r10d
        movl    $1, %eax
        cmpl    $8, %r10d
        jb      .LBB0_12
# BB#2:
        movl    %r10d, %r8d
        andl    $-8, %r8d
        movl    %r10d, %r9d
        andl    $-8, %r9d
        je      .LBB0_12
# BB#3:
        movd    %ecx, %xmm0
        pshufd  $0, %xmm0, %xmm2        # xmm2 = xmm0[0,0,0,0]
        movabsq $.LCPI0_0, %rax
        paddd   (%rax), %xmm2
        leal    -8(%r9), %r11d
        movl    %r11d, %eax
        shrl    $3, %eax
        incl    %eax
        andl    $3, %eax
        movabsq $.LCPI0_1, %rsi
        je      .LBB0_4
# BB#5:
        movdqa  (%rsi), %xmm5
        negl    %eax
        xorl    %edx, %edx
        movabsq $.LCPI0_2, %rsi
        movdqa  (%rsi), %xmm0
        movabsq $.LCPI0_3, %rsi
        movdqa  (%rsi), %xmm1
        movdqa  %xmm5, %xmm6
        .p2align        4, 0x90
.LBB0_6:                                # =>This Inner Loop Header: Depth=1
        movdqa  %xmm2, %xmm3
        paddd   %xmm0, %xmm3
        movdqa  %xmm2, %xmm4
        pmuludq %xmm5, %xmm4
        pshufd  $232, %xmm4, %xmm4      # xmm4 = xmm4[0,2,2,3]
        pshufd  $245, %xmm5, %xmm5      # xmm5 = xmm5[1,1,3,3]
        pshufd  $245, %xmm2, %xmm7      # xmm7 = xmm2[1,1,3,3]
        pmuludq %xmm5, %xmm7
        pshufd  $232, %xmm7, %xmm5      # xmm5 = xmm7[0,2,2,3]
        punpckldq       %xmm5, %xmm4    # xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
        pshufd  $245, %xmm3, %xmm5      # xmm5 = xmm3[1,1,3,3]
        pmuludq %xmm6, %xmm3
        pshufd  $232, %xmm3, %xmm3      # xmm3 = xmm3[0,2,2,3]
        pshufd  $245, %xmm6, %xmm6      # xmm6 = xmm6[1,1,3,3]
        pmuludq %xmm5, %xmm6
        pshufd  $232, %xmm6, %xmm5      # xmm5 = xmm6[0,2,2,3]
        punpckldq       %xmm5, %xmm3    # xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
        addl    $8, %edx
        paddd   %xmm1, %xmm2
        incl    %eax
        movdqa  %xmm4, %xmm5
        movdqa  %xmm3, %xmm6
        jne     .LBB0_6
        jmp     .LBB0_7
.LBB0_4:
        xorl    %edx, %edx
        movdqa  (%rsi), %xmm4
        movdqa  %xmm4, %xmm3
.LBB0_7:
        cmpl    $24, %r11d
        jb      .LBB0_10
# BB#8:
        movl    %r9d, %eax
        subl    %edx, %eax
        movabsq $.LCPI0_2, %rdx
        movdqa  (%rdx), %xmm8
        movabsq $.LCPI0_3, %rdx
        movdqa  (%rdx), %xmm9
        movabsq $.LCPI0_4, %rdx
        movdqa  (%rdx), %xmm10
        movabsq $.LCPI0_5, %rdx
        movdqa  (%rdx), %xmm11
        movabsq $.LCPI0_6, %rdx
        movdqa  (%rdx), %xmm12
        movabsq $.LCPI0_7, %rdx
        movdqa  (%rdx), %xmm13
        movabsq $.LCPI0_8, %rdx
        movdqa  (%rdx), %xmm14
        movabsq $.LCPI0_9, %rdx
        movdqa  (%rdx), %xmm6
        .p2align        4, 0x90
.LBB0_9:                                # =>This Inner Loop Header: Depth=1
        movdqa  %xmm2, %xmm0
        paddd   %xmm8, %xmm0
        movdqa  %xmm2, %xmm1
        pmuludq %xmm4, %xmm1
        pshufd  $232, %xmm1, %xmm1      # xmm1 = xmm1[0,2,2,3]
        pshufd  $245, %xmm4, %xmm4      # xmm4 = xmm4[1,1,3,3]
        pshufd  $245, %xmm2, %xmm7      # xmm7 = xmm2[1,1,3,3]
        pmuludq %xmm4, %xmm7
        pshufd  $232, %xmm7, %xmm4      # xmm4 = xmm7[0,2,2,3]
        punpckldq       %xmm4, %xmm1    # xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
        pshufd  $245, %xmm0, %xmm4      # xmm4 = xmm0[1,1,3,3]
        pmuludq %xmm3, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        pshufd  $245, %xmm3, %xmm3      # xmm3 = xmm3[1,1,3,3]
        pmuludq %xmm4, %xmm3
        pshufd  $232, %xmm3, %xmm3      # xmm3 = xmm3[0,2,2,3]
        punpckldq       %xmm3, %xmm0    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
        movdqa  %xmm2, %xmm3
        paddd   %xmm9, %xmm3
        movdqa  %xmm2, %xmm4
        paddd   %xmm10, %xmm4
        pshufd  $245, %xmm3, %xmm7      # xmm7 = xmm3[1,1,3,3]
        pmuludq %xmm1, %xmm3
        pshufd  $232, %xmm3, %xmm3      # xmm3 = xmm3[0,2,2,3]
        pshufd  $245, %xmm1, %xmm1      # xmm1 = xmm1[1,1,3,3]
        pmuludq %xmm7, %xmm1
        pshufd  $232, %xmm1, %xmm1      # xmm1 = xmm1[0,2,2,3]
        punpckldq       %xmm1, %xmm3    # xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
        pshufd  $245, %xmm4, %xmm1      # xmm1 = xmm4[1,1,3,3]
        pmuludq %xmm0, %xmm4
        pshufd  $232, %xmm4, %xmm4      # xmm4 = xmm4[0,2,2,3]
        pshufd  $245, %xmm0, %xmm0      # xmm0 = xmm0[1,1,3,3]
        pmuludq %xmm1, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        punpckldq       %xmm0, %xmm4    # xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
        movdqa  %xmm2, %xmm0
        paddd   %xmm11, %xmm0
        movdqa  %xmm2, %xmm1
        paddd   %xmm12, %xmm1
        pshufd  $245, %xmm0, %xmm7      # xmm7 = xmm0[1,1,3,3]
        pmuludq %xmm3, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        pshufd  $245, %xmm3, %xmm3      # xmm3 = xmm3[1,1,3,3]
        pmuludq %xmm7, %xmm3
        pshufd  $232, %xmm3, %xmm3      # xmm3 = xmm3[0,2,2,3]
        punpckldq       %xmm3, %xmm0    # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
        pshufd  $245, %xmm1, %xmm3      # xmm3 = xmm1[1,1,3,3]
        pmuludq %xmm4, %xmm1
        pshufd  $232, %xmm1, %xmm1      # xmm1 = xmm1[0,2,2,3]
        pshufd  $245, %xmm4, %xmm4      # xmm4 = xmm4[1,1,3,3]
        pmuludq %xmm3, %xmm4
        pshufd  $232, %xmm4, %xmm3      # xmm3 = xmm4[0,2,2,3]
        punpckldq       %xmm3, %xmm1    # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
        movdqa  %xmm2, %xmm3
        paddd   %xmm13, %xmm3
        movdqa  %xmm2, %xmm7
        paddd   %xmm14, %xmm7
        pshufd  $245, %xmm3, %xmm5      # xmm5 = xmm3[1,1,3,3]
        pmuludq %xmm0, %xmm3
        pshufd  $232, %xmm3, %xmm4      # xmm4 = xmm3[0,2,2,3]
        pshufd  $245, %xmm0, %xmm0      # xmm0 = xmm0[1,1,3,3]
        pmuludq %xmm5, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        punpckldq       %xmm0, %xmm4    # xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
        pshufd  $245, %xmm7, %xmm0      # xmm0 = xmm7[1,1,3,3]
        pmuludq %xmm1, %xmm7
        pshufd  $232, %xmm7, %xmm3      # xmm3 = xmm7[0,2,2,3]
        pshufd  $245, %xmm1, %xmm1      # xmm1 = xmm1[1,1,3,3]
        pmuludq %xmm0, %xmm1
        pshufd  $232, %xmm1, %xmm0      # xmm0 = xmm1[0,2,2,3]
        punpckldq       %xmm0, %xmm3    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
        paddd   %xmm6, %xmm2
        addl    $-32, %eax
        jne     .LBB0_9
.LBB0_10:
        pshufd  $245, %xmm3, %xmm0      # xmm0 = xmm3[1,1,3,3]
        pmuludq %xmm4, %xmm3
        pshufd  $232, %xmm3, %xmm1      # xmm1 = xmm3[0,2,2,3]
        pshufd  $245, %xmm4, %xmm2      # xmm2 = xmm4[1,1,3,3]
        pmuludq %xmm0, %xmm2
        pshufd  $232, %xmm2, %xmm0      # xmm0 = xmm2[0,2,2,3]
        punpckldq       %xmm0, %xmm1    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
        pshufd  $78, %xmm1, %xmm0       # xmm0 = xmm1[2,3,0,1]
        pshufd  $245, %xmm1, %xmm2      # xmm2 = xmm1[1,1,3,3]
        pmuludq %xmm0, %xmm1
        pshufd  $232, %xmm1, %xmm1      # xmm1 = xmm1[0,2,2,3]
        pshufd  $245, %xmm0, %xmm0      # xmm0 = xmm0[1,1,3,3]
        pmuludq %xmm2, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        punpckldq       %xmm0, %xmm1    # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
        pshufd  $229, %xmm1, %xmm0      # xmm0 = xmm1[1,1,2,3]
        pmuludq %xmm1, %xmm0
        pshufd  $232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
        pshufd  $245, %xmm1, %xmm1      # xmm1 = xmm1[1,1,3,3]
        pmuludq %xmm0, %xmm1
        pshufd  $232, %xmm1, %xmm1      # xmm1 = xmm1[0,2,2,3]
        punpckldq       %xmm1, %xmm0    # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
        movd    %xmm0, %eax
        cmpl    %r9d, %r10d
        je      .LBB0_13
# BB#11:
        subl    %r8d, %ecx
        .p2align        4, 0x90
.LBB0_12:                               # =>This Inner Loop Header: Depth=1
        imull   %ecx, %eax
        cmpl    $2, %ecx
        leal    -1(%rcx), %ecx
                                        # kill: %ECX<def> %ECX<kill> %RCX<def>
        jg      .LBB0_12
.LBB0_13:
        movaps  (%rsp), %xmm6           # 16-byte Reload
        movaps  16(%rsp), %xmm7         # 16-byte Reload
        movaps  32(%rsp), %xmm8         # 16-byte Reload
        movaps  48(%rsp), %xmm9         # 16-byte Reload
        movaps  64(%rsp), %xmm10        # 16-byte Reload
        movaps  80(%rsp), %xmm11        # 16-byte Reload
        movaps  96(%rsp), %xmm12        # 16-byte Reload
        movaps  112(%rsp), %xmm13       # 16-byte Reload
        movaps  128(%rsp), %xmm14       # 16-byte Reload
        addq    $144, %rsp
        popq    %rsi
        retq
        .seh_handlerdata
        .text
.Lcfi13:
        .seh_endproc



$
2
0
5

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
0