1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

FreeBSDを超最適化ビルドをする

Posted at

FreeBSDのコンパイラは各種最適化オプションを受け付けることができます。

しかし、その最適化オプションによっては動作しないこともあります。

CPUはAMD Ryzen7 3700 メモリはDDR4-3200 64G
環境はMicrosoft Windows 10 Pro Hyper-V です。

ターゲットとなるCPU

# make targets
amd64/amd64
arm/armv6
arm/armv7
arm64/aarch64
i386/i386
mips/mips
mips/mips64
powerpc/powerpc
powerpc/powerpc64
riscv/riscv64
riscv/riscv64sf

左が TARGET= に指定するもの、右が TARGET_ARCH= に指定するものとなります。

x86/x64で最適化できるCPUオプション

clang の -march= に入るCPU最適化となります。

#   Intel x86 architecture:
#       (AMD CPUs)      znver2, znver1, bdver4, bdver3, bdver2, bdver1,
#                       btver2, btver1, amdfam10, opteron-sse3, athlon64-sse3,
#                       k8-sse3, opteron, athlon64, athlon-fx, k8, athlon-mp,
#                       athlon-xp, athlon-4, athlon-tbird, athlon, k7, geode,
#                       k6-3, k6-2, k6
#       (Intel CPUs)    tigerlake, cooperlake, cascadelake, tremont,
#                       goldmont-plus, icelake-server, icelake-client,
#                       cannonlake, knm, skylake-avx512, knl, goldmont,
#                       skylake, broadwell, haswell, ivybridge, sandybridge,
#                       westmere, nehalem, silvermont, bonnell, core2, core,
#                       nocona, pentium4m, pentium4, prescott, pentium3m,
#                       pentium3, pentium-m, pentium2, pentiumpro, pentium-mmx,
#                       pentium, i486
#
#       (VIA CPUs)      c7, c3-2, c3
#   ARM architecture:   armv5, armv5te, armv6, armv6t2, arm1176jzf-s, armv7,
#                       armv7-a, armv7ve, generic-armv7-a, cortex-a5,
#                       cortex-a7, cortex-a8, cortex-a9, cortex-a12,
#                       cortex-a15, cortex-a17
#   ARM64 architechture:        cortex-a53, cortex-a57, cortex-a72,
#                               exynos-m1

実際に使用したオプション = カーネル用

/etc/make-kernel.conf
CFLAGS+=-O2 -pipe
CXXFLAGS+=
COPTFLAGS= -O3 -fno-strict-aliasing -pipe
/etc/src-kernel.conf
# disable malloc_debug function
MALLOC_PRODUCTION=YES
build-kernel.sh
make -j 16 buildkernel KERNCONF=GENERIC __MAKE_CONF=/etc/make-kernel.conf SRCCONF=/etc/src-kernel.conf

実際に使用したオプション world用

/etc/make-world.conf
CPUTYPE?=znver2
CFLAGS+=-O3 -fno-strict-aliasing -pipe
CXXFLAGS+=-O3 -fno-strict-aliasing -pipe -march=znver2
COPTFLAGS= -O3 -fno-strict-aliasing -pipe -march=znver2
/etc/src-world.conf
# disable malloc_debug function
MALLOC_PRODUCTION=YES
build-world.sh
make -j 16 buildworld __MAKE_CONF=/etc/make-world.conf SRCCONF=/etc/src-world.conf

実際に使用したオプション ports用

/etc/make.conf
CFLAGS+= -O3 -fno-strict-aliasing -pipe -march=znver2 -fPIC
CXXFLAGS+=-O3 -fno-strict-aliasing -pipe -march=znver2 -fPIC
COPTFLAGS+= -O3 -fno-strict-aliasing -pipe -march=znver2 -fPIC

/etc/src.conf
# disable malloc_debug function
MALLOC_PRODUCTION=YES
update_ports.sh
# !/bin/sh
portupgrade -a

VPSやクラウド等、物理サーバーに多数のゲストが存在する場合

-O3 オプションよりも、より多くのプログラムをL3キャッシュに格納させて、効率的に動作させるために
-Os オプションのが高速に動作する場合はあります。

nbench や unixbench 等のツールで実際ビルドしたシステムの性能をよく確認しながら最適化してみるとよいでしょう。

付録記事 あの strlen.c をコンパイルするとどうなるの?

一部ソースを省略しています。
64bitのみアセンブラソースに変換しています。

strlen.c
# include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

# include <sys/limits.h>
# include <sys/types.h>
# include <string.h>

/*
 * Portable strlen() for 32-bit and 64-bit systems.
 *
 * Rationale: it is generally much more efficient to do word length
 * operations and avoid branches on modern computer systems, as
 * compared to byte-length operations with a lot of branches.
 *
 * The expression:
 *
 *	((x - 0x01....01) & ~x & 0x80....80)
 *
 * would evaluate to a non-zero value iff any of the bytes in the
 * original word is zero.
 *
 * On multi-issue processors, we can divide the above expression into:
 *	a)  (x - 0x01....01)
 *	b) (~x & 0x80....80)
 *	c) a & b
 *
 * Where, a) and b) can be partially computed in parallel.
 *
 * The algorithm above is found on "Hacker's Delight" by
 * Henry S. Warren, Jr.
 */

/* Magic numbers for the algorithm */
# if LONG_BIT == 32
static const unsigned long mask01 = 0x01010101;
static const unsigned long mask80 = 0x80808080;
# elif LONG_BIT == 64
static const unsigned long mask01 = 0x0101010101010101;
static const unsigned long mask80 = 0x8080808080808080;
# else
# error Unsupported word size
# endif

# define	LONGPTR_MASK (sizeof(long) - 1)

/*
 * Helper macro to return string length if we caught the zero
 * byte.
 */
# define testbyte(x)				\
	do {					\
		if (p[x] == '\0')		\
		    return (p - str + x);	\
	} while (0)

size_t
strlen(const char *str)
{
	const char *p;
	const unsigned long *lp;
	long va, vb;

	/*
	 * Before trying the hard (unaligned byte-by-byte access) way
	 * to figure out whether there is a nul character, try to see
	 * if there is a nul character is within this accessible word
	 * first.
	 *
	 * p and (p & ~LONGPTR_MASK) must be equally accessible since
	 * they always fall in the same memory page, as long as page
	 * boundaries is integral multiple of word size.
	 */
	lp = (const unsigned long *)((uintptr_t)str & ~LONGPTR_MASK);
	va = (*lp - mask01);
	vb = ((~*lp) & mask80);
	lp++;
	if (va & vb)
		/* Check if we have \0 in the first part */
		for (p = str; p < (const char *)lp; p++)
			if (*p == '\0')
				return (p - str);

	/* Scan the rest of the string using word sized operation */
	for (; ; lp++) {
		va = (*lp - mask01);
		vb = ((~*lp) & mask80);
		if (va & vb) {
			p = (const char *)(lp);
			testbyte(0);
			testbyte(1);
			testbyte(2);
			testbyte(3);
# if (LONG_BIT >= 64)
			testbyte(4);
			testbyte(5);
			testbyte(6);
			testbyte(7);
# endif
		}
	}

	/* NOTREACHED */
	return (0);
}
最適化なし.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.section	.rodata
	.align 8
	.type	mask01, @object
	.size	mask01, 8
mask01:
	.quad	72340172838076673
	.align 8
	.type	mask80, @object
	.size	mask80, 8
mask80:
	.quad	-9187201950435737472
	.text
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB6:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	movq	%rdi, -40(%rbp)
	movq	-40(%rbp), %rax
	andq	$-8, %rax
	movq	%rax, -16(%rbp)
	movq	-16(%rbp), %rax
	movq	(%rax), %rax
	movabsq	$72340172838076673, %rdx
	subq	%rdx, %rax
	movq	%rax, -24(%rbp)
	movq	-16(%rbp), %rax
	movq	(%rax), %rax
	notq	%rax
	movq	%rax, %rdx
	movabsq	$-9187201950435737472, %rax
	andq	%rdx, %rax
	movq	%rax, -32(%rbp)
	addq	$8, -16(%rbp)
	movq	-24(%rbp), %rax
	andq	-32(%rbp), %rax
	testq	%rax, %rax
	je	.L15
	movq	-40(%rbp), %rax
	movq	%rax, -8(%rbp)
	jmp	.L3
.L6:
	movq	-8(%rbp), %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L4
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	jmp	.L5
.L4:
	addq	$1, -8(%rbp)
.L3:
	movq	-8(%rbp), %rax
	cmpq	-16(%rbp), %rax
	jb	.L6
.L15:
	movq	-16(%rbp), %rax
	movq	(%rax), %rax
	movabsq	$72340172838076673, %rdx
	subq	%rdx, %rax
	movq	%rax, -24(%rbp)
	movq	-16(%rbp), %rax
	movq	(%rax), %rax
	notq	%rax
	movq	%rax, %rdx
	movabsq	$-9187201950435737472, %rax
	andq	%rdx, %rax
	movq	%rax, -32(%rbp)
	movq	-24(%rbp), %rax
	andq	-32(%rbp), %rax
	testq	%rax, %rax
	je	.L7
	movq	-16(%rbp), %rax
	movq	%rax, -8(%rbp)
	movq	-8(%rbp), %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L8
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	jmp	.L5
.L8:
	movq	-8(%rbp), %rax
	addq	$1, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L9
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$1, %rax
	jmp	.L5
.L9:
	movq	-8(%rbp), %rax
	addq	$2, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L10
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$2, %rax
	jmp	.L5
.L10:
	movq	-8(%rbp), %rax
	addq	$3, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L11
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$3, %rax
	jmp	.L5
.L11:
	movq	-8(%rbp), %rax
	addq	$4, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L12
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$4, %rax
	jmp	.L5
.L12:
	movq	-8(%rbp), %rax
	addq	$5, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L13
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$5, %rax
	jmp	.L5
.L13:
	movq	-8(%rbp), %rax
	addq	$6, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L14
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$6, %rax
	jmp	.L5
.L14:
	movq	-8(%rbp), %rax
	addq	$7, %rax
	movzbl	(%rax), %eax
	testb	%al, %al
	jne	.L7
	movq	-8(%rbp), %rax
	subq	-40(%rbp), %rax
	addq	$7, %rax
	jmp	.L5
.L7:
	addq	$8, -16(%rbp)
	jmp	.L15
.L5:
	popq	%rbp
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE6:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-O2.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.p2align 4
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB6:
	.cfi_startproc
	movabsq	$-72340172838076673, %rsi
	movq	%rdi, %rax
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	movq	%rdx, %rcx
	addq	%rsi, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L2
	cmpq	%rax, %rdi
	jnb	.L2
	movq	%rdi, %rdx
	jmp	.L5
	.p2align 4,,10
	.p2align 3
.L3:
	addq	$1, %rdx
	cmpq	%rdx, %rax
	je	.L2
.L5:
	cmpb	$0, (%rdx)
	jne	.L3
	movq	%rdx, %rax
	subq	%rdi, %rax
	ret
	.p2align 4,,10
	.p2align 3
.L2:
	movabsq	$-72340172838076673, %r8
	movabsq	$-9187201950435737472, %rsi
	jmp	.L20
	.p2align 4,,10
	.p2align 3
.L7:
	cmpb	$0, 1(%rax)
	je	.L22
	cmpb	$0, 2(%rax)
	je	.L23
	cmpb	$0, 3(%rax)
	je	.L24
	cmpb	$0, 4(%rax)
	je	.L25
	cmpb	$0, 5(%rax)
	je	.L26
	cmpb	$0, 6(%rax)
	je	.L27
	cmpb	$0, 7(%rax)
	je	.L28
.L6:
	addq	$8, %rax
.L20:
	movq	(%rax), %rdx
	movq	%rdx, %rcx
	addq	%r8, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	testq	%rsi, %rdx
	je	.L6
	cmpb	$0, (%rax)
	jne	.L7
	subq	%rdi, %rax
	ret
.L22:
	subq	%rdi, %rax
	addq	$1, %rax
	ret
.L23:
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L24:
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L25:
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L26:
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L27:
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L28:
	subq	%rdi, %rax
	addq	$7, %rax
	ret
	.cfi_endproc
.LFE6:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-O2-zen2.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.p2align 4
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB3:
	.cfi_startproc
	movq	%rdi, %rax
	movabsq	$-72340172838076673, %rcx
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	addq	%rdx, %rcx
	andn	%rcx, %rdx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L2
	cmpq	%rax, %rdi
	jnb	.L2
	movq	%rdi, %rdx
	jmp	.L5
	.p2align 4
	.p2align 3
.L3:
	incq	%rdx
	cmpq	%rdx, %rax
	je	.L2
.L5:
	cmpb	$0, (%rdx)
	jne	.L3
	movq	%rdx, %rax
	subq	%rdi, %rax
	ret
	.p2align 4
	.p2align 3
.L2:
	movabsq	$-72340172838076673, %r8
	movabsq	$-9187201950435737472, %rsi
	jmp	.L20
	.p2align 4
	.p2align 3
.L7:
	cmpb	$0, 1(%rax)
	je	.L22
	cmpb	$0, 2(%rax)
	je	.L23
	cmpb	$0, 3(%rax)
	je	.L24
	cmpb	$0, 4(%rax)
	je	.L25
	cmpb	$0, 5(%rax)
	je	.L26
	cmpb	$0, 6(%rax)
	je	.L27
	cmpb	$0, 7(%rax)
	je	.L28
.L6:
	addq	$8, %rax
.L20:
	movq	(%rax), %rdx
	leaq	(%rdx,%r8), %rcx
	andn	%rcx, %rdx, %rdx
	testq	%rsi, %rdx
	je	.L6
	cmpb	$0, (%rax)
	jne	.L7
	subq	%rdi, %rax
	ret
.L22:
	subq	%rdi, %rax
	incq	%rax
	ret
.L23:
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L24:
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L25:
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L26:
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L27:
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L28:
	subq	%rdi, %rax
	addq	$7, %rax
	ret
	.cfi_endproc
.LFE3:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-O3.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.p2align 4
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB6:
	.cfi_startproc
	movabsq	$-72340172838076673, %rsi
	movq	%rdi, %rax
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	movq	%rdx, %rcx
	addq	%rsi, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L2
	cmpq	%rax, %rdi
	jnb	.L2
	movq	%rdi, %rdx
	jmp	.L5
	.p2align 4,,10
	.p2align 3
.L3:
	addq	$1, %rdx
	cmpq	%rax, %rdx
	je	.L2
.L5:
	cmpb	$0, (%rdx)
	jne	.L3
	movq	%rdx, %rax
	subq	%rdi, %rax
	ret
	.p2align 4,,10
	.p2align 3
.L2:
	movabsq	$-72340172838076673, %r8
	movabsq	$-9187201950435737472, %rsi
	jmp	.L20
	.p2align 4,,10
	.p2align 3
.L7:
	cmpb	$0, 1(%rax)
	je	.L22
	cmpb	$0, 2(%rax)
	je	.L23
	cmpb	$0, 3(%rax)
	je	.L24
	cmpb	$0, 4(%rax)
	je	.L25
	cmpb	$0, 5(%rax)
	je	.L26
	cmpb	$0, 6(%rax)
	je	.L27
	cmpb	$0, 7(%rax)
	je	.L28
.L6:
	addq	$8, %rax
.L20:
	movq	(%rax), %rdx
	movq	%rdx, %rcx
	addq	%r8, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	testq	%rsi, %rdx
	je	.L6
	cmpb	$0, (%rax)
	jne	.L7
	subq	%rdi, %rax
	ret
.L22:
	subq	%rdi, %rax
	addq	$1, %rax
	ret
.L23:
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L24:
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L25:
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L26:
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L27:
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L28:
	subq	%rdi, %rax
	addq	$7, %rax
	ret
	.cfi_endproc
.LFE6:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-O3-zen2.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.p2align 4
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB3:
	.cfi_startproc
	movq	%rdi, %rax
	movabsq	$-72340172838076673, %rcx
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	addq	%rdx, %rcx
	andn	%rcx, %rdx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L2
	cmpq	%rax, %rdi
	jnb	.L2
	movq	%rdi, %rdx
	jmp	.L5
	.p2align 4
	.p2align 3
.L3:
	incq	%rdx
	cmpq	%rax, %rdx
	je	.L2
.L5:
	cmpb	$0, (%rdx)
	jne	.L3
	movq	%rdx, %rax
	subq	%rdi, %rax
	ret
	.p2align 4
	.p2align 3
.L2:
	movabsq	$-72340172838076673, %r8
	movabsq	$-9187201950435737472, %rsi
	jmp	.L20
	.p2align 4
	.p2align 3
.L7:
	cmpb	$0, 1(%rax)
	je	.L22
	cmpb	$0, 2(%rax)
	je	.L23
	cmpb	$0, 3(%rax)
	je	.L24
	cmpb	$0, 4(%rax)
	je	.L25
	cmpb	$0, 5(%rax)
	je	.L26
	cmpb	$0, 6(%rax)
	je	.L27
	cmpb	$0, 7(%rax)
	je	.L28
.L6:
	addq	$8, %rax
.L20:
	movq	(%rax), %rdx
	leaq	(%rdx,%r8), %rcx
	andn	%rcx, %rdx, %rdx
	testq	%rsi, %rdx
	je	.L6
	cmpb	$0, (%rax)
	jne	.L7
	subq	%rdi, %rax
	ret
.L22:
	subq	%rdi, %rax
	incq	%rax
	ret
.L23:
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L24:
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L25:
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L26:
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L27:
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L28:
	subq	%rdi, %rax
	addq	$7, %rax
	ret
	.cfi_endproc
.LFE3:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-Os.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB6:
	.cfi_startproc
	movabsq	$-72340172838076673, %rsi
	movq	%rdi, %rax
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	movq	%rdx, %rcx
	addq	%rsi, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L3
	movq	%rdi, %rdx
.L2:
	cmpq	%rax, %rdx
	jnb	.L3
	cmpb	$0, (%rdx)
	jne	.L4
	movq	%rdx, %rax
	jmp	.L20
.L4:
	incq	%rdx
	jmp	.L2
.L3:
	movabsq	$-72340172838076673, %rsi
	movabsq	$-9187201950435737472, %r8
.L19:
	movq	(%rax), %rdx
	movq	%rdx, %rcx
	addq	%rsi, %rdx
	notq	%rcx
	andq	%rcx, %rdx
	testq	%r8, %rdx
	je	.L7
	cmpb	$0, (%rax)
	jne	.L8
.L20:
	subq	%rdi, %rax
	ret
.L8:
	cmpb	$0, 1(%rax)
	jne	.L9
	subq	%rdi, %rax
	incq	%rax
	ret
.L9:
	cmpb	$0, 2(%rax)
	jne	.L10
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L10:
	cmpb	$0, 3(%rax)
	jne	.L11
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L11:
	cmpb	$0, 4(%rax)
	jne	.L12
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L12:
	cmpb	$0, 5(%rax)
	jne	.L13
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L13:
	cmpb	$0, 6(%rax)
	jne	.L14
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L14:
	cmpb	$0, 7(%rax)
	jne	.L7
	subq	%rdi, %rax
	addq	$7, %rax
	ret
.L7:
	addq	$8, %rax
	jmp	.L19
	.cfi_endproc
.LFE6:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
gcc-Os-zen2.s
	.file	"strlen.c"
	.text
# APP
	.ident	"$FreeBSD$"
# NO_APP
	.globl	strlen
	.type	strlen, @function
strlen:
.LFB3:
	.cfi_startproc
	movq	%rdi, %rax
	movabsq	$-72340172838076673, %rcx
	andq	$-8, %rax
	movq	(%rax), %rdx
	addq	$8, %rax
	addq	%rdx, %rcx
	andn	%rcx, %rdx, %rdx
	movabsq	$-9187201950435737472, %rcx
	testq	%rcx, %rdx
	je	.L3
	movq	%rdi, %rdx
.L2:
	cmpq	%rax, %rdx
	jnb	.L3
	cmpb	$0, (%rdx)
	jne	.L4
	movq	%rdx, %rax
	jmp	.L20
.L4:
	incq	%rdx
	jmp	.L2
.L3:
	movabsq	$-72340172838076673, %rcx
	movabsq	$-9187201950435737472, %rsi
.L19:
	movq	(%rax), %rdx
	leaq	(%rdx,%rcx), %r8
	andn	%r8, %rdx, %rdx
	testq	%rsi, %rdx
	je	.L7
	cmpb	$0, (%rax)
	jne	.L8
.L20:
	subq	%rdi, %rax
	ret
.L8:
	cmpb	$0, 1(%rax)
	jne	.L9
	subq	%rdi, %rax
	incq	%rax
	ret
.L9:
	cmpb	$0, 2(%rax)
	jne	.L10
	subq	%rdi, %rax
	addq	$2, %rax
	ret
.L10:
	cmpb	$0, 3(%rax)
	jne	.L11
	subq	%rdi, %rax
	addq	$3, %rax
	ret
.L11:
	cmpb	$0, 4(%rax)
	jne	.L12
	subq	%rdi, %rax
	addq	$4, %rax
	ret
.L12:
	cmpb	$0, 5(%rax)
	jne	.L13
	subq	%rdi, %rax
	addq	$5, %rax
	ret
.L13:
	cmpb	$0, 6(%rax)
	jne	.L14
	subq	%rdi, %rax
	addq	$6, %rax
	ret
.L14:
	cmpb	$0, 7(%rax)
	jne	.L7
	subq	%rdi, %rax
	addq	$7, %rax
	ret
.L7:
	addq	$8, %rax
	jmp	.L19
	.cfi_endproc
.LFE3:
	.size	strlen, .-strlen
	.ident	"GCC: (FreeBSD Ports Collection) 10.3.0"
	.section	.note.GNU-stack,"",@progbits
1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?