`-O2' を指定したらとんでもないコード吐くなあ、なんだこれ?
# その内詳しい人が解説してくれるだろうという他力本願
$ clang -v
clang version 4.0.1 (tags/RELEASE_401/final)
Target: x86_64-unknown-windows-cygnus
Thread model: posix
InstalledDir: /usr/bin
$ cat -n factorial.c
1 int factorial(int n)
2 {
3 if (n <= 1) {
4 return 1;
5 } else {
6 return n * factorial(n - 1);
7 }
8 }
$ clang -Wall -Wextra -O0 -S factorial.c -o -
.text
.def factorial;
.scl 2;
.type 32;
.endef
.globl factorial
.p2align 4, 0x90
factorial: # @factorial
.Lcfi0:
.seh_proc factorial
# BB#0:
subq $56, %rsp
.Lcfi1:
.seh_stackalloc 56
.Lcfi2:
.seh_endprologue
movl %ecx, 48(%rsp)
cmpl $1, 48(%rsp)
jg .LBB0_2
# BB#1:
movl $1, 52(%rsp)
jmp .LBB0_3
.LBB0_2:
movl 48(%rsp), %eax
movl 48(%rsp), %ecx
subl $1, %ecx
movl %eax, 44(%rsp) # 4-byte Spill
callq factorial
movl 44(%rsp), %ecx # 4-byte Reload
imull %eax, %ecx
movl %ecx, 52(%rsp)
.LBB0_3:
movl 52(%rsp), %eax
addq $56, %rsp
retq
.seh_handlerdata
.text
.Lcfi3:
.seh_endproc
$ clang -Wall -Wextra -O1 -S factorial.c -o -
.text
.def factorial;
.scl 2;
.type 32;
.endef
.globl factorial
.p2align 4, 0x90
factorial: # @factorial
# BB#0:
# kill: %ECX<def> %ECX<kill> %RCX<def>
movl $1, %eax
cmpl $2, %ecx
jl .LBB0_3
# BB#1:
movl $1, %eax
.p2align 4, 0x90
.LBB0_2: # =>This Inner Loop Header: Depth=1
imull %ecx, %eax
cmpl $2, %ecx
leal -1(%rcx), %ecx
# kill: %ECX<def> %ECX<kill> %RCX<def>
jg .LBB0_2
.LBB0_3:
retq
$ clang -Wall -Wextra -O2 -S factorial.c -o -
.text
.def factorial;
.scl 2;
.type 32;
.endef
.section .rdata,"dr"
.p2align 4
.LCPI0_0:
.long 0 # 0x0
.long 4294967295 # 0xffffffff
.long 4294967294 # 0xfffffffe
.long 4294967293 # 0xfffffffd
.LCPI0_1:
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.long 1 # 0x1
.LCPI0_2:
.long 4294967292 # 0xfffffffc
.long 4294967292 # 0xfffffffc
.long 4294967292 # 0xfffffffc
.long 4294967292 # 0xfffffffc
.LCPI0_3:
.long 4294967288 # 0xfffffff8
.long 4294967288 # 0xfffffff8
.long 4294967288 # 0xfffffff8
.long 4294967288 # 0xfffffff8
.LCPI0_4:
.long 4294967284 # 0xfffffff4
.long 4294967284 # 0xfffffff4
.long 4294967284 # 0xfffffff4
.long 4294967284 # 0xfffffff4
.LCPI0_5:
.long 4294967280 # 0xfffffff0
.long 4294967280 # 0xfffffff0
.long 4294967280 # 0xfffffff0
.long 4294967280 # 0xfffffff0
.LCPI0_6:
.long 4294967276 # 0xffffffec
.long 4294967276 # 0xffffffec
.long 4294967276 # 0xffffffec
.long 4294967276 # 0xffffffec
.LCPI0_7:
.long 4294967272 # 0xffffffe8
.long 4294967272 # 0xffffffe8
.long 4294967272 # 0xffffffe8
.long 4294967272 # 0xffffffe8
.LCPI0_8:
.long 4294967268 # 0xffffffe4
.long 4294967268 # 0xffffffe4
.long 4294967268 # 0xffffffe4
.long 4294967268 # 0xffffffe4
.LCPI0_9:
.long 4294967264 # 0xffffffe0
.long 4294967264 # 0xffffffe0
.long 4294967264 # 0xffffffe0
.long 4294967264 # 0xffffffe0
.text
.globl factorial
.p2align 4, 0x90
factorial: # @factorial
.Lcfi0:
.seh_proc factorial
# BB#0:
pushq %rsi
.Lcfi1:
.seh_pushreg 6
subq $144, %rsp
.Lcfi2:
.seh_stackalloc 144
movdqa %xmm14, 128(%rsp) # 16-byte Spill
.Lcfi3:
.seh_savexmm 14, 128
movdqa %xmm13, 112(%rsp) # 16-byte Spill
.Lcfi4:
.seh_savexmm 13, 112
movdqa %xmm12, 96(%rsp) # 16-byte Spill
.Lcfi5:
.seh_savexmm 12, 96
movdqa %xmm11, 80(%rsp) # 16-byte Spill
.Lcfi6:
.seh_savexmm 11, 80
movdqa %xmm10, 64(%rsp) # 16-byte Spill
.Lcfi7:
.seh_savexmm 10, 64
movdqa %xmm9, 48(%rsp) # 16-byte Spill
.Lcfi8:
.seh_savexmm 9, 48
movdqa %xmm8, 32(%rsp) # 16-byte Spill
.Lcfi9:
.seh_savexmm 8, 32
movdqa %xmm7, 16(%rsp) # 16-byte Spill
.Lcfi10:
.seh_savexmm 7, 16
movdqa %xmm6, (%rsp) # 16-byte Spill
.Lcfi11:
.seh_savexmm 6, 0
.Lcfi12:
.seh_endprologue
# kill: %ECX<def> %ECX<kill> %RCX<def>
movl $1, %eax
cmpl $2, %ecx
jl .LBB0_13
# BB#1:
movl %ecx, %eax
notl %eax
cmpl $-4, %eax
movl $-3, %edx
cmovgl %eax, %edx
leal 2(%rdx,%rcx), %r10d
movl $1, %eax
cmpl $8, %r10d
jb .LBB0_12
# BB#2:
movl %r10d, %r8d
andl $-8, %r8d
movl %r10d, %r9d
andl $-8, %r9d
je .LBB0_12
# BB#3:
movd %ecx, %xmm0
pshufd $0, %xmm0, %xmm2 # xmm2 = xmm0[0,0,0,0]
movabsq $.LCPI0_0, %rax
paddd (%rax), %xmm2
leal -8(%r9), %r11d
movl %r11d, %eax
shrl $3, %eax
incl %eax
andl $3, %eax
movabsq $.LCPI0_1, %rsi
je .LBB0_4
# BB#5:
movdqa (%rsi), %xmm5
negl %eax
xorl %edx, %edx
movabsq $.LCPI0_2, %rsi
movdqa (%rsi), %xmm0
movabsq $.LCPI0_3, %rsi
movdqa (%rsi), %xmm1
movdqa %xmm5, %xmm6
.p2align 4, 0x90
.LBB0_6: # =>This Inner Loop Header: Depth=1
movdqa %xmm2, %xmm3
paddd %xmm0, %xmm3
movdqa %xmm2, %xmm4
pmuludq %xmm5, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
pshufd $245, %xmm5, %xmm5 # xmm5 = xmm5[1,1,3,3]
pshufd $245, %xmm2, %xmm7 # xmm7 = xmm2[1,1,3,3]
pmuludq %xmm5, %xmm7
pshufd $232, %xmm7, %xmm5 # xmm5 = xmm7[0,2,2,3]
punpckldq %xmm5, %xmm4 # xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
pshufd $245, %xmm3, %xmm5 # xmm5 = xmm3[1,1,3,3]
pmuludq %xmm6, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
pshufd $245, %xmm6, %xmm6 # xmm6 = xmm6[1,1,3,3]
pmuludq %xmm5, %xmm6
pshufd $232, %xmm6, %xmm5 # xmm5 = xmm6[0,2,2,3]
punpckldq %xmm5, %xmm3 # xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
addl $8, %edx
paddd %xmm1, %xmm2
incl %eax
movdqa %xmm4, %xmm5
movdqa %xmm3, %xmm6
jne .LBB0_6
jmp .LBB0_7
.LBB0_4:
xorl %edx, %edx
movdqa (%rsi), %xmm4
movdqa %xmm4, %xmm3
.LBB0_7:
cmpl $24, %r11d
jb .LBB0_10
# BB#8:
movl %r9d, %eax
subl %edx, %eax
movabsq $.LCPI0_2, %rdx
movdqa (%rdx), %xmm8
movabsq $.LCPI0_3, %rdx
movdqa (%rdx), %xmm9
movabsq $.LCPI0_4, %rdx
movdqa (%rdx), %xmm10
movabsq $.LCPI0_5, %rdx
movdqa (%rdx), %xmm11
movabsq $.LCPI0_6, %rdx
movdqa (%rdx), %xmm12
movabsq $.LCPI0_7, %rdx
movdqa (%rdx), %xmm13
movabsq $.LCPI0_8, %rdx
movdqa (%rdx), %xmm14
movabsq $.LCPI0_9, %rdx
movdqa (%rdx), %xmm6
.p2align 4, 0x90
.LBB0_9: # =>This Inner Loop Header: Depth=1
movdqa %xmm2, %xmm0
paddd %xmm8, %xmm0
movdqa %xmm2, %xmm1
pmuludq %xmm4, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pshufd $245, %xmm2, %xmm7 # xmm7 = xmm2[1,1,3,3]
pmuludq %xmm4, %xmm7
pshufd $232, %xmm7, %xmm4 # xmm4 = xmm7[0,2,2,3]
punpckldq %xmm4, %xmm1 # xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
pshufd $245, %xmm0, %xmm4 # xmm4 = xmm0[1,1,3,3]
pmuludq %xmm3, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
pshufd $245, %xmm3, %xmm3 # xmm3 = xmm3[1,1,3,3]
pmuludq %xmm4, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
punpckldq %xmm3, %xmm0 # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
movdqa %xmm2, %xmm3
paddd %xmm9, %xmm3
movdqa %xmm2, %xmm4
paddd %xmm10, %xmm4
pshufd $245, %xmm3, %xmm7 # xmm7 = xmm3[1,1,3,3]
pmuludq %xmm1, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
pshufd $245, %xmm1, %xmm1 # xmm1 = xmm1[1,1,3,3]
pmuludq %xmm7, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
punpckldq %xmm1, %xmm3 # xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
pshufd $245, %xmm4, %xmm1 # xmm1 = xmm4[1,1,3,3]
pmuludq %xmm0, %xmm4
pshufd $232, %xmm4, %xmm4 # xmm4 = xmm4[0,2,2,3]
pshufd $245, %xmm0, %xmm0 # xmm0 = xmm0[1,1,3,3]
pmuludq %xmm1, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
punpckldq %xmm0, %xmm4 # xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
movdqa %xmm2, %xmm0
paddd %xmm11, %xmm0
movdqa %xmm2, %xmm1
paddd %xmm12, %xmm1
pshufd $245, %xmm0, %xmm7 # xmm7 = xmm0[1,1,3,3]
pmuludq %xmm3, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
pshufd $245, %xmm3, %xmm3 # xmm3 = xmm3[1,1,3,3]
pmuludq %xmm7, %xmm3
pshufd $232, %xmm3, %xmm3 # xmm3 = xmm3[0,2,2,3]
punpckldq %xmm3, %xmm0 # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
pshufd $245, %xmm1, %xmm3 # xmm3 = xmm1[1,1,3,3]
pmuludq %xmm4, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
pshufd $245, %xmm4, %xmm4 # xmm4 = xmm4[1,1,3,3]
pmuludq %xmm3, %xmm4
pshufd $232, %xmm4, %xmm3 # xmm3 = xmm4[0,2,2,3]
punpckldq %xmm3, %xmm1 # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
movdqa %xmm2, %xmm3
paddd %xmm13, %xmm3
movdqa %xmm2, %xmm7
paddd %xmm14, %xmm7
pshufd $245, %xmm3, %xmm5 # xmm5 = xmm3[1,1,3,3]
pmuludq %xmm0, %xmm3
pshufd $232, %xmm3, %xmm4 # xmm4 = xmm3[0,2,2,3]
pshufd $245, %xmm0, %xmm0 # xmm0 = xmm0[1,1,3,3]
pmuludq %xmm5, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
punpckldq %xmm0, %xmm4 # xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
pshufd $245, %xmm7, %xmm0 # xmm0 = xmm7[1,1,3,3]
pmuludq %xmm1, %xmm7
pshufd $232, %xmm7, %xmm3 # xmm3 = xmm7[0,2,2,3]
pshufd $245, %xmm1, %xmm1 # xmm1 = xmm1[1,1,3,3]
pmuludq %xmm0, %xmm1
pshufd $232, %xmm1, %xmm0 # xmm0 = xmm1[0,2,2,3]
punpckldq %xmm0, %xmm3 # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
paddd %xmm6, %xmm2
addl $-32, %eax
jne .LBB0_9
.LBB0_10:
pshufd $245, %xmm3, %xmm0 # xmm0 = xmm3[1,1,3,3]
pmuludq %xmm4, %xmm3
pshufd $232, %xmm3, %xmm1 # xmm1 = xmm3[0,2,2,3]
pshufd $245, %xmm4, %xmm2 # xmm2 = xmm4[1,1,3,3]
pmuludq %xmm0, %xmm2
pshufd $232, %xmm2, %xmm0 # xmm0 = xmm2[0,2,2,3]
punpckldq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
pshufd $78, %xmm1, %xmm0 # xmm0 = xmm1[2,3,0,1]
pshufd $245, %xmm1, %xmm2 # xmm2 = xmm1[1,1,3,3]
pmuludq %xmm0, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
pshufd $245, %xmm0, %xmm0 # xmm0 = xmm0[1,1,3,3]
pmuludq %xmm2, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
punpckldq %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
pshufd $229, %xmm1, %xmm0 # xmm0 = xmm1[1,1,2,3]
pmuludq %xmm1, %xmm0
pshufd $232, %xmm0, %xmm0 # xmm0 = xmm0[0,2,2,3]
pshufd $245, %xmm1, %xmm1 # xmm1 = xmm1[1,1,3,3]
pmuludq %xmm0, %xmm1
pshufd $232, %xmm1, %xmm1 # xmm1 = xmm1[0,2,2,3]
punpckldq %xmm1, %xmm0 # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
movd %xmm0, %eax
cmpl %r9d, %r10d
je .LBB0_13
# BB#11:
subl %r8d, %ecx
.p2align 4, 0x90
.LBB0_12: # =>This Inner Loop Header: Depth=1
imull %ecx, %eax
cmpl $2, %ecx
leal -1(%rcx), %ecx
# kill: %ECX<def> %ECX<kill> %RCX<def>
jg .LBB0_12
.LBB0_13:
movaps (%rsp), %xmm6 # 16-byte Reload
movaps 16(%rsp), %xmm7 # 16-byte Reload
movaps 32(%rsp), %xmm8 # 16-byte Reload
movaps 48(%rsp), %xmm9 # 16-byte Reload
movaps 64(%rsp), %xmm10 # 16-byte Reload
movaps 80(%rsp), %xmm11 # 16-byte Reload
movaps 96(%rsp), %xmm12 # 16-byte Reload
movaps 112(%rsp), %xmm13 # 16-byte Reload
movaps 128(%rsp), %xmm14 # 16-byte Reload
addq $144, %rsp
popq %rsi
retq
.seh_handlerdata
.text
.Lcfi13:
.seh_endproc
$