LoginSignup
1
1

std::byteswap を C++23 でなくても使いたい

Last updated at Posted at 2024-06-08

std::byteswap が C++23 からあるようですが C++23 より前でも使いたいので書いておく。

動作確認しているコンパイラは以下の二つです。

  • Visual Studio 2022 Version 17.10.1
    • Microsoft Visual C++ 2022
  • Apple clang version 15.0.0 (clang-1500.3.9.4)
    • x86_64-apple-darwin23.5.0

専用関数を使わない

byteswap に相当する CPU 命令は随分前(Intel® プロセッサ 80386 の bswap は約40年前)からありますが、使うにはコンパイラの専用関数やインライン アセンブラ等で記述していました。今ではコンパイラの最適化の性能向上により、専用関数を使わなくても同じ結果を期待できます。

sample1.cpp
#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L
#include <bit>
#endif

#if __cpp_lib_byteswap < 202110L

#ifdef _MSC_VER
#include <cstdlib>
#endif /* MSVC */

#ifndef __has_builtin
#define __has_builtin(x)  0
#endif

#if !(__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
#  ifndef constexpr
#    define constexpr
#  endif
#  ifndef noexcept
#    define noexcept
#  endif
#endif

namespace std
{
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4293 4333 26452)
#endif /* MSVC */

    template <typename T>
    inline constexpr T byteswap(T x) noexcept
    {
        return (sizeof(T) == 8
                ? T((((x >> 000) & 0xff) << 070) |
                    (((x >> 010) & 0xff) << 060) |
                    (((x >> 020) & 0xff) << 050) |
                    (((x >> 030) & 0xff) << 040) |
                    (((x >> 040) & 0xff) << 030) |
                    (((x >> 050) & 0xff) << 020) |
                    (((x >> 060) & 0xff) << 010) |
                    (((x >> 070) & 0xff) << 000))
                : sizeof(T) == 4
                ? T((((x >> 000) & 0xff) << 030) |
                    (((x >> 010) & 0xff) << 020) |
                    (((x >> 020) & 0xff) << 010) |
                    (((x >> 030) & 0xff) << 000))
                : sizeof(T) == 2
                ? T((((x >> 000) & 0xff) << 010) |
                    (((x >> 010) & 0xff) << 000))
                : x);
    }

#ifdef _MSC_VER
#pragma warning(pop)
#endif /* MSVC */
} // std

#endif /* __cpp_lib_byteswap < 202110L */

// ----------------------------------------------------------------------------

#include <cstdint>
#include <iostream>
#include <string>

template <typename T>
std::string hexstr(T x)
{
    static const char hex[] = "0123456789ABCDEF";
    std::string s;

    for (size_t i = 0; i < sizeof(T); ++i)
    {
        if (i)
            s += ' ';
        s += hex[(x >> ((sizeof(T) - i) * 8 - 4)) & 15];
        s += hex[(x >> ((sizeof(T) - i) * 8 - 8)) & 15];
    }
    return s;
}

template <typename T>
void test(T x)
{

    T y = std::byteswap(x);

    std::cout << "x = " << hexstr(x) << std::endl
              << "y = " << hexstr(y) << std::endl;
}

int main(int argc, char** argv)
{
#ifdef __cplusplus
    std::cout << "__cplusplus = " << __cplusplus << std::endl;
#endif
#ifdef _MSVC_LANG
    std::cout << "_MSVC_LANG = " << _MSVC_LANG << std::endl;
#endif
#ifdef __cpp_lib_byteswap
    std::cout << "__cpp_lib_byteswap = " << __cpp_lib_byteswap << std::endl;
#endif

    for (int i = 1; i < argc; ++i)
    {
        uint64_t x = strtoull(argv[i], NULL, 16);

        std::cout << std::endl;

        if (x < (UINT64_C(1) << 8))
            test<uint8_t>(uint8_t(x));
        else if (x < (UINT64_C(1) << 16))
            test<uint16_t>(uint16_t(x));
        else if (x < (UINT64_C(1) << 32))
            test<uint32_t>(uint32_t(x));
        else
            test(x);
    }
    return 0;
}
実行結果(MSVC)
__cplusplus = 199711
_MSVC_LANG = 201402

x = 12
y = 12

x = 12 34
y = 34 12

x = 12 34 56 78
y = 78 56 34 12

x = 12 34 56 78 9A BC DE F0
y = F0 DE BC 9A 78 56 34 12

MSVC での最適化

debug ビルドでプロジェクトのオプション

  • 構成プロパティ → C/C++
    • コード生成 → 基本ランタイム チェック
      • 「既定」に変更
    • 最適化 → 最適化
      • 「最適化(速度を優先)(/Ox)」に変更

でビルドして逆アセンブルを表示すると

byteswap(unsigned char x)
00007FF668C32880  push        rbx
00007FF668C32882  sub         rsp,20h
00007FF668C32886  movzx       ebx,cl  <<< ebx = x
00007FF668C32889  lea         rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C32890  call        __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C32895  movzx       eax,bl  <<< return bl
00007FF668C32898  add         rsp,20h
00007FF668C3289C  pop         rbx
00007FF668C3289D  ret
byteswap(unsigned short x)
00007FF668C328B0  push        rbx
00007FF668C328B2  sub         rsp,20h
00007FF668C328B6  movzx       ebx,cx  <<< ebx = x
00007FF668C328B9  lea         rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C328C0  call        __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C328C5  ror         bx,8    <<< bx = byteswap(bx)
00007FF668C328C9  movzx       eax,bx  <<< return bx
00007FF668C328CC  add         rsp,20h
00007FF668C328D0  pop         rbx
00007FF668C328D1  ret
byteswap(unsigned int x)
00007FF668C328E0  push        rbx
00007FF668C328E2  sub         rsp,20h
00007FF668C328E6  mov         ebx,ecx  <<< ebx = x
00007FF668C328E8  lea         rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C328EF  call        __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C328F4  bswap       ebx      <<< ebx = byteswap(ebx)
00007FF668C328F6  mov         eax,ebx  <<< return ebx
00007FF668C328F8  add         rsp,20h
00007FF668C328FC  pop         rbx
00007FF668C328FD  ret
byteswap(unsigned __int64 x)
00007FF668C32910  push        rbx
00007FF668C32912  sub         rsp,20h
00007FF668C32916  mov         rbx,rcx  <<< rbx = x
00007FF668C32919  lea         rcx,[__D9F3C95A_byteswap@cpp (07FF668C44067h)]
00007FF668C32920  call        __CheckForDebuggerJustMyCode (07FF668C31541h)
00007FF668C32925  bswap       rbx      <<< rbx = byteswap(rbx)
00007FF668C32928  mov         rax,rbx  <<< return rbx
00007FF668C3292B  add         rsp,20h
00007FF668C3292F  pop         rbx
00007FF668C32930  ret

を確認できます。(<<< 部分は追記)

最適化なし(/Od)の場合
byteswap(unsigned __int64 x)
00007FF770983850  mov         qword ptr [rsp+8],rcx
00007FF770983855  push        rbp
00007FF770983856  push        rdi
00007FF770983857  sub         rsp,108h
00007FF77098385E  lea         rbp,[rsp+20h]
00007FF770983863  lea         rcx,[__D9F3C95A_byteswap@cpp (07FF77099A067h)]
00007FF77098386A  call        __CheckForDebuggerJustMyCode (07FF770981604h)
00007FF77098386F  nop
00007FF770983870  xor         eax,eax
00007FF770983872  cmp         eax,1
00007FF770983875  je          std::byteswap<unsigned __int64>+0F3h (07FF770983943h)
00007FF77098387B  mov         rax,qword ptr [x]
00007FF770983882  and         rax,0FFh
00007FF770983888  shl         rax,38h
00007FF77098388C  mov         rcx,qword ptr [x]
00007FF770983893  shr         rcx,8
00007FF770983897  and         rcx,0FFh
00007FF77098389E  shl         rcx,30h
00007FF7709838A2  or          rax,rcx
00007FF7709838A5  mov         rcx,qword ptr [x]
00007FF7709838AC  shr         rcx,10h
00007FF7709838B0  and         rcx,0FFh
00007FF7709838B7  shl         rcx,28h
00007FF7709838BB  or          rax,rcx
00007FF7709838BE  mov         rcx,qword ptr [x]
00007FF7709838C5  shr         rcx,18h
00007FF7709838C9  and         rcx,0FFh
00007FF7709838D0  shl         rcx,20h
00007FF7709838D4  or          rax,rcx
00007FF7709838D7  mov         rcx,qword ptr [x]
00007FF7709838DE  shr         rcx,20h
00007FF7709838E2  and         rcx,0FFh
00007FF7709838E9  shl         rcx,18h
00007FF7709838ED  or          rax,rcx
00007FF7709838F0  mov         rcx,qword ptr [x]
00007FF7709838F7  shr         rcx,28h
00007FF7709838FB  and         rcx,0FFh
00007FF770983902  shl         rcx,10h
00007FF770983906  or          rax,rcx
00007FF770983909  mov         rcx,qword ptr [x]
00007FF770983910  shr         rcx,30h
00007FF770983914  and         rcx,0FFh
00007FF77098391B  shl         rcx,8
00007FF77098391F  or          rax,rcx
00007FF770983922  mov         rcx,qword ptr [x]
00007FF770983929  shr         rcx,38h
00007FF77098392D  and         rcx,0FFh
00007FF770983934  or          rax,rcx
00007FF770983937  mov         qword ptr [rbp+0C0h],rax
00007FF77098393E  jmp         std::byteswap<unsigned __int64>+1B9h (07FF770983A09h)
00007FF770983943  xor         eax,eax
00007FF770983945  test        eax,eax
00007FF770983947  je          std::byteswap<unsigned __int64>+15Ah (07FF7709839AAh)
00007FF770983949  mov         rax,qword ptr [x]
00007FF770983950  and         rax,0FFh
00007FF770983956  shl         rax,18h
00007FF77098395A  mov         rcx,qword ptr [x]
00007FF770983961  shr         rcx,8
00007FF770983965  and         rcx,0FFh
00007FF77098396C  shl         rcx,10h
00007FF770983970  or          rax,rcx
00007FF770983973  mov         rcx,qword ptr [x]
00007FF77098397A  shr         rcx,10h
00007FF77098397E  and         rcx,0FFh
00007FF770983985  shl         rcx,8
00007FF770983989  or          rax,rcx
00007FF77098398C  mov         rcx,qword ptr [x]
00007FF770983993  shr         rcx,18h
00007FF770983997  and         rcx,0FFh
00007FF77098399E  or          rax,rcx
00007FF7709839A1  mov         qword ptr [rbp+0C8h],rax
00007FF7709839A8  jmp         std::byteswap<unsigned __int64>+1ABh (07FF7709839FBh)
00007FF7709839AA  xor         eax,eax
00007FF7709839AC  test        eax,eax
00007FF7709839AE  je          std::byteswap<unsigned __int64>+18Fh (07FF7709839DFh)
00007FF7709839B0  mov         rax,qword ptr [x]
00007FF7709839B7  and         rax,0FFh
00007FF7709839BD  shl         rax,8
00007FF7709839C1  mov         rcx,qword ptr [x]
00007FF7709839C8  shr         rcx,8
00007FF7709839CC  and         rcx,0FFh
00007FF7709839D3  or          rax,rcx
00007FF7709839D6  mov         qword ptr [rbp+0D0h],rax
00007FF7709839DD  jmp         std::byteswap<unsigned __int64>+19Dh (07FF7709839EDh)
00007FF7709839DF  mov         rax,qword ptr [x]
00007FF7709839E6  mov         qword ptr [rbp+0D0h],rax
00007FF7709839ED  mov         rax,qword ptr [rbp+0D0h]
00007FF7709839F4  mov         qword ptr [rbp+0C8h],rax
00007FF7709839FB  mov         rax,qword ptr [rbp+0C8h]
00007FF770983A02  mov         qword ptr [rbp+0C0h],rax
00007FF770983A09  mov         rax,qword ptr [rbp+0C0h]
00007FF770983A10  lea         rsp,[rbp+0E8h]
00007FF770983A17  pop         rdi
00007FF770983A18  pop         rbp
00007FF770983A19  ret

Clang での最適化

以下の方法でアセンブラ ファイルを作ります。

sample1.s生成
$ clang++ -O -S sample1.cpp

sample1.s の中のbswapを検索すると

sample1.s(抜粋)
〜〜〜(略)〜〜〜
	movq	%rax, %r14
	bswapl	%ebx
	leaq	-48(%rbp), %rdi
〜〜〜(略)〜〜〜
	movq	%rax, %r14
	bswapq	%rbx
	leaq	-48(%rbp), %rdi
〜〜〜(略)〜〜〜

を確認できます。

参考:専用関数を使用

sample2.cpp
#if __cplusplus >= 202002L || _MSVC_LANG >= 202002L
#include <bit>
#endif

#if __cpp_lib_byteswap < 202110L

#ifdef _MSC_VER
#include <cstdlib>
#endif /* MSVC */

#ifndef __has_builtin
#define __has_builtin(x)  0
#endif

#if !(__cplusplus >= 201103L || _MSVC_LANG >= 201103L)
#  ifndef constexpr
#    define constexpr
#  endif
#  ifndef noexcept
#    define noexcept
#  endif
#endif

#ifndef USE_BUILTIN_BSWAP
#define USE_BUILTIN_BSWAP  1
#endif

namespace std
{
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable: 4067 4293 4333 26452)
#endif /* MSVC */

    template <typename T>
    inline constexpr T byteswap(T x) noexcept
    {
        return (sizeof(T) == 8 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
                T(_byteswap_uint64((unsigned __int64)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap64)
                T(__builtin_bswap64(x))
#else /* C */
                (((x >> 000) & 0xff) << 070) |
                (((x >> 010) & 0xff) << 060) |
                (((x >> 020) & 0xff) << 050) |
                (((x >> 030) & 0xff) << 040) |
                (((x >> 040) & 0xff) << 030) |
                (((x >> 050) & 0xff) << 020) |
                (((x >> 060) & 0xff) << 010) |
                (((x >> 070) & 0xff) << 000)
#endif /* C */
                : sizeof(T) == 4 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
                T(_byteswap_ulong((unsigned long)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap32)
                T(__builtin_bswap32(x))
#else /* C */
                (((x >> 000) & 0xff) << 030) |
                (((x >> 010) & 0xff) << 020) |
                (((x >> 020) & 0xff) << 010) |
                (((x >> 030) & 0xff) << 000)
#endif /* C */
                : sizeof(T) == 2 ?
#if USE_BUILTIN_BSWAP && defined(_MSC_VER)
                T(_byteswap_ushort((unsigned short)x))
#elif USE_BUILTIN_BSWAP && __has_builtin(__builtin_bswap16)
                T(__builtin_bswap16(x))
#else  /* C */
                (((x >> 000) & 0xff) << 010) |
                (((x >> 010) & 0xff) << 000)
#endif /* C */
                : x);
    }

#ifdef _MSC_VER
#pragma warning(pop)
#endif /* MSVC */
} // std

#endif /* __cpp_lib_byteswap < 202110L */

// ----------------------------------------------------------------------------

#include <cstdint>
#include <iostream>
#include <string>

template <typename T>
std::string hexstr(T x)
{
    static const char hex[] = "0123456789ABCDEF";
    std::string s;

    for (size_t i = 0; i < sizeof(T); ++i)
    {
        if (i)
            s += ' ';
        s += hex[(x >> ((sizeof(T) - i) * 8 - 4)) & 15];
        s += hex[(x >> ((sizeof(T) - i) * 8 - 8)) & 15];
    }
    return s;
}

template <typename T>
void test(T x)
{

    T y = std::byteswap(x);

    std::cout << "x = " << hexstr(x) << std::endl
              << "y = " << hexstr(y) << std::endl;
}

int main(int argc, char** argv)
{
#ifdef __cplusplus
    std::cout << "__cplusplus = " << __cplusplus << std::endl;
#endif
#ifdef _MSVC_LANG
    std::cout << "_MSVC_LANG = " << _MSVC_LANG << std::endl;
#endif
#ifdef __cpp_lib_byteswap
    std::cout << "__cpp_lib_byteswap = " << __cpp_lib_byteswap << std::endl;
#endif

    for (int i = 1; i < argc; ++i)
    {
        uint64_t x = strtoull(argv[i], NULL, 16);

        std::cout << std::endl;

        if (x < (UINT64_C(1) << 8))
            test<uint8_t>(uint8_t(x));
        else if (x < (UINT64_C(1) << 16))
            test<uint16_t>(uint16_t(x));
        else if (x < (UINT64_C(1) << 32))
            test<uint32_t>(uint32_t(x));
        else
            test(x);
    }
    return 0;
}
1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1