More than 5 years have passed since last update.

switch文のケースに使われているリテラル値で最適化はしてくれるのか？

Last updated at 2016-02-14Posted at 2016-02-14

switch文のcaseラベルってリテラルで指定するし、その数字使って処理する場合は最適化かけてくれてコンパイル時にconst値にしてくれたりしないのかな？という疑問があったので実験。

コード

測定される側

DWORD値とビット位置を受け取って、DWORD値のビット位置の真/偽を返す。
ただし、ビット位置は1～8の範囲。(0bit目指定は1という扱い)

//maskの右辺値はコンパイル時に決められるから計算コスト不要であることを期待
class Select0
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		uint32_t mask = 0;
		switch (bitPos)
		{
		case 1:	 mask = 1<<0; break;
		case 2:	 mask = 1<<1; break;
		case 3:	 mask = 1<<2; break;
		case 4:	 mask = 1<<3; break;
		case 5:	 mask = 1<<4; break;
		case 6:	 mask = 1<<5; break;
		case 7:	 mask = 1<<6; break;
		case 8:	 mask = 1<<7; break;
		default: return false;
		}
		return (dword & mask) != 0;
	}
};
//Select0の派生。Select0が最適化されていたらこれと同じ速度になるはず。
class Select00
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		switch (bitPos)
		{
		case 1:	 return (dword & (1 << 0)) != 0;
		case 2:	 return (dword & (1 << 1)) != 0;
		case 3:	 return (dword & (1 << 2)) != 0;
		case 4:	 return (dword & (1 << 3)) != 0;
		case 5:	 return (dword & (1 << 4)) != 0;
		case 6:	 return (dword & (1 << 5)) != 0;
		case 7:	 return (dword & (1 << 6)) != 0;
		case 8:	 return (dword & (1 << 7)) != 0;
		default: return false;
		}
	}
};
//Select0の派生。Select0が最適化されていたらこれと同じ速度になるはず。
class Select01
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		switch (bitPos)
		{
		case 1:	 return (dword & (1  )) != 0;
		case 2:	 return (dword & (2  )) != 0;
		case 3:	 return (dword & (4  )) != 0;
		case 4:	 return (dword & (8  )) != 0;
		case 5:	 return (dword & (16 )) != 0;
		case 6:	 return (dword & (32 )) != 0;
		case 7:	 return (dword & (64 )) != 0;
		case 8:	 return (dword & (128)) != 0;
		default: return false;
		}
	}
};
//最適化が賢ければSelect0はこう書いても同じになるはず。
class Select1
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		switch (bitPos)
		{
		case 1:	case 2:	case 3:	case 4:	case 5:	case 6:	case 7:	case 8:
		{
			const uint32_t mask = 1 << (bitPos - 1);
			return (dword & mask) != 0;
		}
		default: return false;
		}
	}
};
//if文で普通に処理するパターン
class Select2
{

public:
	bool operator()(int bitPos, uint32_t dword)
	{
		if (bitPos < 1 || 8 < bitPos)
		{
			return false;
		}
		return ((dword >> (bitPos - 1)) & 1) == 1;
	}

};
//if文で範囲外をはじいてからテーブル処理するパターン。テーブル内のシフト演算はコンパイル時に確定するはず。
class Select30
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		if (bitPos < 1 || 8 < bitPos)
		{
			return false;
		}
		static const uint32_t mask[] = { 0, 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7};
		return (dword & mask[bitPos]) != 0;
	}

};
//Select30の亜種。範囲外はすべて0番目の配列で処理
class Select31
{
public:
	bool operator()(int bitPos, uint32_t dword)
	{
		static const uint32_t mask[] = { 0, 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
		return (dword & mask[8<bitPos ? 0 : bitPos]) != 0;
	}

};

測定する側

# include <iostream>
# include "Timer.hpp"

template<typename T>
class Measure
{
	static const uint64_t rep = 100000000;
	int count_;
public:
	Measure() :count_()
	{}
	int count()
	{
		return count_;
	}
	double execute(uint32_t dword)
	{
		Timer<TimerType::HighAccuracy> timer;
		for (uint64_t i = 0; i < rep; ++i)
		{
			if (T()(i % 8 + 1, dword))
			{
				++count_;
			}
		}
		return timer.elapsed();
	}

};
# include <iomanip>
# define measure(n,dw) \
{ \
	Measure<Select##n> a; \
	volatile double r = a.execute(dw); \
	int c = a.count(); \
	cout << #n":\t" << /*fixed << setprecision(6) << */ r << ", " << c << endl; \
}
//setprecisionをつけるとなぜかSelect2が遅くなる
int main(int argv, char* argc[])
{
	static const int testNum = 2;
	//最適化効いたらいやなので標準入力から32という数値を放り込む
	uint32_t in;
	cin >> in;

	volatile uint32_t dw = in;

	measure(0, dw);
	measure(00, dw);
	measure(01, dw);
	measure(1, dw);
	measure(2, dw);
	measure(30, dw);
	measure(31, dw);

	return 0;
}

環境

Visual Studio 2013 express, Releaseビルド

結果

0:      0.276666, 12500000
00:     0.216566, 12500000
01:     0.213629, 12500000
1:      0.143589, 12500000
2:      0.113139, 12500000
30:     0.115278, 12500000
31:     0.144108, 12500000
続行するには何かキーを押してください . . .

[測定対象名]:[時間], [trueになった回数]で出力している。
（trueになった回数はロジック間違えてないかの確認用）

考察

よくわかんない。

Select0系が遅い理由

ジャンプテーブル化はされているっぽいがそれがそもそも遅いのか？

Select01のアセンブラ

$LL4@execute:
	mov	eax, ecx
	and	eax, 7
	cmp	eax, 7
	ja	SHORT $LN3@execute
	jmp	DWORD PTR $LN45@execute[eax*4]
$LN23@execute:
	mov	al, bl
	and	eax, -255				; ffffff01H
	jmp	SHORT $LN24@execute
$LN22@execute:
	mov	eax, ebx
	shr	eax, 1
	jmp	SHORT $LN40@execute
$LN21@execute:
	mov	eax, ebx
	shr	eax, 2
	jmp	SHORT $LN40@execute
$LN20@execute:
	mov	eax, ebx
	shr	eax, 3
	jmp	SHORT $LN40@execute
$LN19@execute:
	mov	eax, ebx
	shr	eax, 4
	jmp	SHORT $LN40@execute
$LN18@execute:
	mov	eax, ebx
	shr	eax, 5
	jmp	SHORT $LN40@execute
$LN17@execute:
	mov	eax, ebx
	shr	eax, 6
	jmp	SHORT $LN40@execute
$LN16@execute:
	mov	eax, ebx
	shr	eax, 7
$LN40@execute:
	and	al, 1
$LN24@execute:
	test	al, al
	je	SHORT $LN3@execute; Line 23
	inc	DWORD PTR [edi]

ジャンプテーブルっぽいもの

$LN45@execute:
	DD	$LN23@execute
	DD	$LN22@execute
	DD	$LN21@execute
	DD	$LN20@execute
	DD	$LN19@execute
	DD	$LN18@execute
	DD	$LN17@execute
	DD	$LN16@execute

Select1が速い理由

Select01よりアセンブラレベルですっきりしてる・・・から？

_TEXT	SEGMENT
_dword$ = 8						; size = 4
$T1 = 12						; size = 4
??RSelect1@@QAE_NHI@Z PROC				; Select1::operator(), COMDAT
	push	ebp
	mov	ebp, esp
	cmp	DWORD PTR $T1[ebp], 8
	jle	SHORT $LN2@operator
	xor	al, al
	pop	ebp
	ret	8
$LN2@operator:
	dec	ecx
	mov	eax, 1
	shl	eax, cl
	and	eax, DWORD PTR _dword$[ebp]
	neg	eax
	sbb	eax, eax
	neg	eax
	pop	ebp
	ret	8
??RSelect1@@QAE_NHI@Z ENDP				; Select1::operator()
_TEXT	ENDS

Select2が速い理由

アイエエエエ!ミジカイ!?ミジカイナンデ!?
アセンブラレベルで見れば確かに速そう。

	cmp	ecx, 7
	ja	SHORT $LN3@execute
	mov	eax, ebx
	shr	eax, cl
	and	al, 1

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up