std::byteswap が C++23 からあるようですが C++23 より前でも使いたいので書いておく。
動作確認しているコンパイラは以下の二つです。
- Visual Studio 2022 Version 17.10.1
- Microsoft Visual C++ 2022
- Apple clang version 15.0.0 (clang-1500.3.9.4)
- x86_64-apple-darwin23.5.0
専用関数を使わない
byteswap に相当する CPU 命令は随分前(Intel® プロセッサ 80386 の bswap は約40年前)からありますが、使うにはコンパイラの専用関数やインライン アセンブラ等で記述していました。今ではコンパイラの最適化の性能向上により、専用関数を使わなくても同じ結果を期待できます。
sample1.cpp
#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L
#include <bit>
#endif
#if __cpp_lib_byteswap < 202110L
#ifdef _MSC_VER
#include <cstdlib>
#endif /* MSVC */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if !(__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
# ifndef constexpr
# define constexpr
# endif
# ifndef noexcept
# define noexcept
# endif
#endif
namespace std
{
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4293 4333 26452)
#endif /* MSVC */
template <typename T>
inline constexpr T byteswap(T x) noexcept
{
return (sizeof(T) == 8
? T((((x >> 000) & 0xff) << 070) |
(((x >> 010) & 0xff) << 060) |
(((x >> 020) & 0xff) << 050) |
(((x >> 030) & 0xff) << 040) |
(((x >> 040) & 0xff) << 030) |
(((x >> 050) & 0xff) << 020) |
(((x >> 060) & 0xff) << 010) |
(((x >> 070) & 0xff) << 000))
: sizeof(T) == 4
? T((((x >> 000) & 0xff) << 030) |
(((x >> 010) & 0xff) << 020) |
(((x >> 020) & 0xff) << 010) |
(((x >> 030) & 0xff) << 000))
: sizeof(T) == 2
? T((((x >> 000) & 0xff) << 010) |
(((x >> 010) & 0xff) << 000))
: x);
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif /* MSVC */
} // std
#endif /* __cpp_lib_byteswap < 202110L */
// ----------------------------------------------------------------------------
#include <cstdint>
#include <iostream>
#include <string>
template <typename T>
std::string hexstr(T x)
{
static const char hex[] = "0123456789ABCDEF";
std::string s;
for (size_t i = 0; i < sizeof(T); ++i)
{
if (i)
s += ' ';
s += hex[(x >> ((sizeof(T) - i) * 8 - 4)) & 15];
s += hex[(x >> ((sizeof(T) - i) * 8 - 8)) & 15];
}
return s;
}
template <typename T>
void test(T x)
{
T y = std::byteswap(x);
std::cout << "x = " << hexstr(x) << std::endl
<< "y = " << hexstr(y) << std::endl;
}
int main(int argc, char** argv)
{
#ifdef __cplusplus
std::cout << "__cplusplus = " << __cplusplus << std::endl;
#endif
#ifdef _MSVC_LANG
std::cout << "_MSVC_LANG = " << _MSVC_LANG << std::endl;
#endif
#ifdef __cpp_lib_byteswap
std::cout << "__cpp_lib_byteswap = " << __cpp_lib_byteswap << std::endl;
#endif
for (int i = 1; i < argc; ++i)
{
uint64_t x = strtoull(argv[i], NULL, 16);
std::cout << std::endl;
if (x < (UINT64_C(1) << 8))
test<uint8_t>(uint8_t(x));
else if (x < (UINT64_C(1) << 16))
test<uint16_t>(uint16_t(x));
else if (x < (UINT64_C(1) << 32))
test<uint32_t>(uint32_t(x));
else
test(x);
}
return 0;
}
実行結果(MSVC)
__cplusplus = 199711
_MSVC_LANG = 201402
x = 12
y = 12
x = 12 34
y = 34 12
x = 12 34 56 78
y = 78 56 34 12
x = 12 34 56 78 9A BC DE F0
y = F0 DE BC 9A 78 56 34 12
MSVC での最適化
debug ビルドでプロジェクトのオプション
- 構成プロパティ → C/C++
- コード生成 → 基本ランタイム チェック
- 「既定」に変更
- 最適化 → 最適化
- 「最適化(速度を優先)(/Ox)」に変更
- コード生成 → 基本ランタイム チェック
でビルドして逆アセンブルを表示すると
byteswap(unsigned char x)
00007FF668C32880 push rbx
00007FF668C32882 sub rsp,20h
00007FF668C32886 movzx ebx,cl <<< ebx = x
00007FF668C32889 lea rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C32890 call __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C32895 movzx eax,bl <<< return bl
00007FF668C32898 add rsp,20h
00007FF668C3289C pop rbx
00007FF668C3289D ret
byteswap(unsigned short x)
00007FF668C328B0 push rbx
00007FF668C328B2 sub rsp,20h
00007FF668C328B6 movzx ebx,cx <<< ebx = x
00007FF668C328B9 lea rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C328C0 call __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C328C5 ror bx,8 <<< bx = byteswap(bx)
00007FF668C328C9 movzx eax,bx <<< return bx
00007FF668C328CC add rsp,20h
00007FF668C328D0 pop rbx
00007FF668C328D1 ret
byteswap(unsigned int x)
00007FF668C328E0 push rbx
00007FF668C328E2 sub rsp,20h
00007FF668C328E6 mov ebx,ecx <<< ebx = x
00007FF668C328E8 lea rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C328EF call __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C328F4 bswap ebx <<< ebx = byteswap(ebx)
00007FF668C328F6 mov eax,ebx <<< return ebx
00007FF668C328F8 add rsp,20h
00007FF668C328FC pop rbx
00007FF668C328FD ret
byteswap(unsigned __int64 x)
00007FF668C32910 push rbx
00007FF668C32912 sub rsp,20h
00007FF668C32916 mov rbx,rcx <<< rbx = x
00007FF668C32919 lea rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C32920 call __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C32925 bswap rbx <<< rbx = byteswap(rbx)
00007FF668C32928 mov rax,rbx <<< return rbx
00007FF668C3292B add rsp,20h
00007FF668C3292F pop rbx
00007FF668C32930 ret
を確認できます。(<<< 部分は追記)
最適化なし(/Od)の場合
byteswap(unsigned __int64 x)
00007FF770983850 mov qword ptr [rsp+8],rcx
00007FF770983855 push rbp
00007FF770983856 push rdi
00007FF770983857 sub rsp,108h
00007FF77098385E lea rbp,[rsp+20h]
00007FF770983863 lea rcx,[__D9F3C95A_byteswap@cpp (07FF77099A067h)]
00007FF77098386A call __CheckForDebuggerJustMyCode (07FF770981604h)
00007FF77098386F nop
00007FF770983870 xor eax,eax
00007FF770983872 cmp eax,1
00007FF770983875 je std::byteswap<unsigned __int64>+0F3h (07FF770983943h)
00007FF77098387B mov rax,qword ptr [x]
00007FF770983882 and rax,0FFh
00007FF770983888 shl rax,38h
00007FF77098388C mov rcx,qword ptr [x]
00007FF770983893 shr rcx,8
00007FF770983897 and rcx,0FFh
00007FF77098389E shl rcx,30h
00007FF7709838A2 or rax,rcx
00007FF7709838A5 mov rcx,qword ptr [x]
00007FF7709838AC shr rcx,10h
00007FF7709838B0 and rcx,0FFh
00007FF7709838B7 shl rcx,28h
00007FF7709838BB or rax,rcx
00007FF7709838BE mov rcx,qword ptr [x]
00007FF7709838C5 shr rcx,18h
00007FF7709838C9 and rcx,0FFh
00007FF7709838D0 shl rcx,20h
00007FF7709838D4 or rax,rcx
00007FF7709838D7 mov rcx,qword ptr [x]
00007FF7709838DE shr rcx,20h
00007FF7709838E2 and rcx,0FFh
00007FF7709838E9 shl rcx,18h
00007FF7709838ED or rax,rcx
00007FF7709838F0 mov rcx,qword ptr [x]
00007FF7709838F7 shr rcx,28h
00007FF7709838FB and rcx,0FFh
00007FF770983902 shl rcx,10h
00007FF770983906 or rax,rcx
00007FF770983909 mov rcx,qword ptr [x]
00007FF770983910 shr rcx,30h
00007FF770983914 and rcx,0FFh
00007FF77098391B shl rcx,8
00007FF77098391F or rax,rcx
00007FF770983922 mov rcx,qword ptr [x]
00007FF770983929 shr rcx,38h
00007FF77098392D and rcx,0FFh
00007FF770983934 or rax,rcx
00007FF770983937 mov qword ptr [rbp+0C0h],rax
00007FF77098393E jmp std::byteswap<unsigned __int64>+1B9h (07FF770983A09h)
00007FF770983943 xor eax,eax
00007FF770983945 test eax,eax
00007FF770983947 je std::byteswap<unsigned __int64>+15Ah (07FF7709839AAh)
00007FF770983949 mov rax,qword ptr [x]
00007FF770983950 and rax,0FFh
00007FF770983956 shl rax,18h
00007FF77098395A mov rcx,qword ptr [x]
00007FF770983961 shr rcx,8
00007FF770983965 and rcx,0FFh
00007FF77098396C shl rcx,10h
00007FF770983970 or rax,rcx
00007FF770983973 mov rcx,qword ptr [x]
00007FF77098397A shr rcx,10h
00007FF77098397E and rcx,0FFh
00007FF770983985 shl rcx,8
00007FF770983989 or rax,rcx
00007FF77098398C mov rcx,qword ptr [x]
00007FF770983993 shr rcx,18h
00007FF770983997 and rcx,0FFh
00007FF77098399E or rax,rcx
00007FF7709839A1 mov qword ptr [rbp+0C8h],rax
00007FF7709839A8 jmp std::byteswap<unsigned __int64>+1ABh (07FF7709839FBh)
00007FF7709839AA xor eax,eax
00007FF7709839AC test eax,eax
00007FF7709839AE je std::byteswap<unsigned __int64>+18Fh (07FF7709839DFh)
00007FF7709839B0 mov rax,qword ptr [x]
00007FF7709839B7 and rax,0FFh
00007FF7709839BD shl rax,8
00007FF7709839C1 mov rcx,qword ptr [x]
00007FF7709839C8 shr rcx,8
00007FF7709839CC and rcx,0FFh
00007FF7709839D3 or rax,rcx
00007FF7709839D6 mov qword ptr [rbp+0D0h],rax
00007FF7709839DD jmp std::byteswap<unsigned __int64>+19Dh (07FF7709839EDh)
00007FF7709839DF mov rax,qword ptr [x]
00007FF7709839E6 mov qword ptr [rbp+0D0h],rax
00007FF7709839ED mov rax,qword ptr [rbp+0D0h]
00007FF7709839F4 mov qword ptr [rbp+0C8h],rax
00007FF7709839FB mov rax,qword ptr [rbp+0C8h]
00007FF770983A02 mov qword ptr [rbp+0C0h],rax
00007FF770983A09 mov rax,qword ptr [rbp+0C0h]
00007FF770983A10 lea rsp,[rbp+0E8h]
00007FF770983A17 pop rdi
00007FF770983A18 pop rbp
00007FF770983A19 ret
Clang での最適化
以下の方法でアセンブラ ファイルを作ります。
sample1.s生成
$ clang++ -O -S sample1.cpp
sample1.s の中のbswap
を検索すると
sample1.s(抜粋)
〜〜〜(略)〜〜〜
movq %rax, %r14
bswapl %ebx
leaq -48(%rbp), %rdi
〜〜〜(略)〜〜〜
movq %rax, %r14
bswapq %rbx
leaq -48(%rbp), %rdi
〜〜〜(略)〜〜〜
を確認できます。
参考:専用関数を使用
sample2.cpp
#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L
#include <bit>
#endif
#if __cpp_lib_byteswap < 202110L
#ifdef _MSC_VER
#include <cstdlib>
#endif /* MSVC */
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
#if !(__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
# ifndef constexpr
# define constexpr
# endif
# ifndef noexcept
# define noexcept
# endif
#endif
#ifndef USE_BUILTIN_BSWAP
#define USE_BUILTIN_BSWAP 1
#endif
namespace std
{
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4067 4293 4333 26452)
#endif /* MSVC */
template <typename T>
inline constexpr T byteswap(T x) noexcept
{
return (sizeof(T) == 8 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
T(_byteswap_uint64((unsigned __int64)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap64)
T(__builtin_bswap64(x))
#else /* C */
(((x >> 000) & 0xff) << 070) |
(((x >> 010) & 0xff) << 060) |
(((x >> 020) & 0xff) << 050) |
(((x >> 030) & 0xff) << 040) |
(((x >> 040) & 0xff) << 030) |
(((x >> 050) & 0xff) << 020) |
(((x >> 060) & 0xff) << 010) |
(((x >> 070) & 0xff) << 000)
#endif /* C */
: sizeof(T) == 4 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
T(_byteswap_ulong((unsigned long)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap32)
T(__builtin_bswap32(x))
#else /* C */
(((x >> 000) & 0xff) << 030) |
(((x >> 010) & 0xff) << 020) |
(((x >> 020) & 0xff) << 010) |
(((x >> 030) & 0xff) << 000)
#endif /* C */
: sizeof(T) == 2 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
T(_byteswap_ushort((unsigned short)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap16)
T(__builtin_bswap16(x))
#else /* C */
(((x >> 000) & 0xff) << 010) |
(((x >> 010) & 0xff) << 000)
#endif /* C */
: x);
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif /* MSVC */
} // std
#endif /* __cpp_lib_byteswap < 202110L */
// ----------------------------------------------------------------------------
#include <cstdint>
#include <iostream>
#include <string>
template <typename T>
std::string hexstr(T x)
{
static const char hex[] = "0123456789ABCDEF";
std::string s;
for (size_t i = 0; i < sizeof(T); ++i)
{
if (i)
s += ' ';
s += hex[(x >> ((sizeof(T) - i) * 8 - 4)) & 15];
s += hex[(x >> ((sizeof(T) - i) * 8 - 8)) & 15];
}
return s;
}
template <typename T>
void test(T x)
{
T y = std::byteswap(x);
std::cout << "x = " << hexstr(x) << std::endl
<< "y = " << hexstr(y) << std::endl;
}
int main(int argc, char** argv)
{
#ifdef __cplusplus
std::cout << "__cplusplus = " << __cplusplus << std::endl;
#endif
#ifdef _MSVC_LANG
std::cout << "_MSVC_LANG = " << _MSVC_LANG << std::endl;
#endif
#ifdef __cpp_lib_byteswap
std::cout << "__cpp_lib_byteswap = " << __cpp_lib_byteswap << std::endl;
#endif
for (int i = 1; i < argc; ++i)
{
uint64_t x = strtoull(argv[i], NULL, 16);
std::cout << std::endl;
if (x < (UINT64_C(1) << 8))
test<uint8_t>(uint8_t(x));
else if (x < (UINT64_C(1) << 16))
test<uint16_t>(uint16_t(x));
else if (x < (UINT64_C(1) << 32))
test<uint32_t>(uint32_t(x));
else
test(x);
}
return 0;
}