1
1

More than 1 year has passed since last update.

Python 3.10 で追加された int 型の bit_count と同じ機能を 3.9 以前で使うための拡張モジュール

Last updated at Posted at 2022-03-16

int 型の bit_count()

Python 3.10 以降には int 型に bit_count() が追加されましたが、それより前の Python にはありません。

xはint型
bin(x).count('1') # 3.9 以前
x.bit_count()     # 3.10 で追加

拡張モジュールの訓練材料としは良い例だと思うので bit_count のみのモジュールを作ってみました。

xはint型
bit_count.bit_count(x)

動作確認している環境

OS
  macOS 12.2.1

C コンパイラ (Xcode 13.2.1)

  Apple clang version 13.0.0 (clang-1300.0.29.30)
  Target: x86_64-apple-darwin21.3.0

Python : /usr/bin/python3

  Python 3.8.9 (default, Oct 26 2021, 07:25:54)
  [Clang 13.0.0 (clang-1300.0.29.30)] on darwin

bit_count.c
#ifndef PY_SSIZE_T_CLEAN
#define PY_SSIZE_T_CLEAN
#endif /* PY_SSIZE_T_CLEAN */

#include <Python.h>

#if (defined(i386) ||       \
     defined(__i386) ||     \
     defined(__i386__) ||   \
     defined(__i486__) ||   \
     defined(__i586__) ||   \
     defined(__i686__) ||   \
     defined(__amd64)   ||  \
     defined(__amd64__) ||  \
     defined(__x86_64) ||   \
     defined(__x86_64__) || \
     0)
#define ISA_X86  1
#else
#define ISA_X86  0
#endif

#ifndef USE_POPCOUNT
#  if defined(__clang__) && defined(__has_builtin) && \
    __has_builtin(__builtin_popcount)
#    define HAS_POPCOUNT  1
#    define USE_POPCOUNT  1
#  endif
#else
#  define HAS_POPCOUNT  0
#endif

/*
 * Method: bit_count
 */

#if PyLong_SHIFT <= 16

inline static digit
stdc_bit_count(digit x)
{
    x = (x & 0x5555) + ((x >> 1) & 0x5555);
    x = (x & 0x3333) + ((x >> 2) & 0x3333);
    x = (x & 0x0f0f) + ((x >> 4) & 0x0f0f);
    x = (x & 0x00ff) + ((x >> 8) & 0x00ff);
    return x;
}

#elif PyLong_SHIFT <= 32

inline static digit
stdc_bit_count(digit x)
{
    x = (x & 0x55555555) + ((x >>  1) & 0x55555555);
    x = (x & 0x33333333) + ((x >>  2) & 0x33333333);
    x = (x & 0x0f0f0f0f) + ((x >>  4) & 0x0f0f0f0f);
    x = (x & 0x00ff00ff) + ((x >>  8) & 0x00ff00ff);
    x = (x & 0x0000ffff) + ((x >> 16) & 0x0000ffff);
    return x;
}

#else
#error "unknown PyLong_SHIFT"
#endif

#if HAS_POPCOUNT

static int has_popcnt = 1;
inline static digit
extc_bit_count(digit x)
{
    return __builtin_popcount(x);
}

#if ISA_X86
#define HAS_CHECK_POPCNT  1

typedef struct cpuid_result {
    unsigned int eax; /* reg:00 */
    unsigned int ecx; /* reg:01 */
    unsigned int edx; /* reg:10 */
    unsigned int ebx; /* reg:11 */
} cpuid_result;

inline static void
check_popcnt()
{
    cpuid_result res;

    res.eax = 1;
    res.ecx = 0;
    res.edx = 0;
    res.ebx = 0;

    __asm__ volatile (
        "mov %0,%%eax;"
        "mov %1,%%ecx;"
        "xor %%edx,%%edx;"
        "xor %%ebx,%%ebx;"
        "cpuid;"
        "mov %%eax,%0;"
        "mov %%ecx,%1;"
        "mov %%edx,%2;"
        "mov %%ebx,%3;"
        : "+m" (res.eax),
          "+m" (res.ecx),
          "+m" (res.edx),
          "+m" (res.ebx)
        : /**/
        : "eax", "ecx", "edx", "ebx");

    has_popcnt = (res.ecx >> 23) & 1;
}

#endif /* ISA_X86 */

#else  /* !HAS_POPCOUNT */

static int has_popcnt = 0;

inline static digit
extc_bit_count(digit x)
{
    return stdc_bit_count(x);
}

#endif /* HAS_POPCOUNT */

static PyObject *
method_bit_count(PyObject *unused, PyObject *const *args, Py_ssize_t size)
{
    PyLongObject *x;
    Py_ssize_t xsize;
    Py_ssize_t i;
    unsigned long count;

    (void) unused;

    if (size != 1) {
        PyErr_SetString(PyExc_TypeError, "bit_count expected at 1 argument");
        return NULL;
    }
    if (!PyLong_CheckExact(args[0])) {
        PyErr_SetObject(PyExc_TypeError, args[0]);
        return NULL;
    }

    x = (PyLongObject *) args[0];
    xsize = Py_ABS(Py_SIZE(x));

    count = 0;
    if (has_popcnt) {
        for (i = 0; i < xsize; i++)
            count += extc_bit_count(x->ob_digit[i]);
    }
    else {
        for (i = 0; i < xsize; i++)
            count += stdc_bit_count(x->ob_digit[i]);
    }
    return PyLong_FromUnsignedLong(count);
}

/*
 * Module: bit_count
 */

static PyMethodDef bit_count_methods[] = {
    {"bit_count", (PyCFunction) method_bit_count, METH_FASTCALL,
     "bit_count(x: int) -> int: same as bin(x).count('1')\n"},
    {NULL, NULL, 0, NULL}, /* end */
};

static PyModuleDef bit_count_def = {
    PyModuleDef_HEAD_INIT,
    .m_name = "bit_count",
    .m_doc = NULL,
    .m_size = -1,
    .m_methods = bit_count_methods,
};

PyMODINIT_FUNC
PyInit_bit_count(void)
{
#if HAS_CHECK_POPCNT
    check_popcnt();
#endif /* HAS_CHECK_POPCNT */
#ifndef NDEBUG
    printf("has_popcnt: %d\n", has_popcnt);
#endif
    return PyModule_Create(&bit_count_def);
}

ビルドとテスト

ビルドは面倒の少ない distutils の setup.py 形式を使います。

setup.py
#!/usr/bin/env python3

import os
from distutils.core import setup, Extension

def getenv(name, defval=None):
    if name in os.environ:
        return os.environ[name]
    return defval

DEBUG = getenv('DEBUG') in ('true', 'yes')

MAJOR_VERSION = 0
MINOR_VERSION = 1
DEBUG_VERSION = 0
VERSION = '%d.%d.%d' % (MAJOR_VERSION, MINOR_VERSION, DEBUG_VERSION)

DEFINE_MACROS = [
    ('MAJOR_VERSION', MAJOR_VERSION),
    ('MINOR_VERSION', MINOR_VERSION),
    ('DEBUG_VERSION', DEBUG_VERSION),
]
UNDEF_MACROS = []

EXTRA_COMPILE_ARGS = [
    '-W',
    '-Wall',
    '-Wno-invalid-offsetof',
    '-Wno-deprecated-declarations',
]

if DEBUG:
    DEFINE_MACROS.append(('DEBUG', 1))
    UNDEF_MACROS.append('NDEBUG')
    EXTRA_COMPILE_ARGS.append('-O0')
    pass

setup(name='bit_count',
      version=VERSION,
      description='',
      ext_modules=[Extension(
          name='bit_count',
          define_macros=DEFINE_MACROS,
          undef_macros=UNDEF_MACROS,
          extra_compile_args=EXTRA_COMPILE_ARGS,
          sources=['bit_count.c'])])
ターミナル上でのビルドとテスト
$ python3 setup.py build
running build
running build_ext
building 'bit_count' extension
creating build
creating build/temp.macosx-10.14-x86_64-3.8
clang -Wno-unused-result -Wsign-compare -Wunreachable-code -fno-common -dynamic -DNDEBUG -g -fwrapv -O3 -Wall -iwithsysroot/System/Library/Frameworks/System.framework/PrivateHeaders -iwithsysroot/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/Headers -arch arm64 -arch x86_64 -Werror=implicit-function-declaration -DMAJOR_VERSION=0 -DMINOR_VERSION=1 -DDEBUG_VERSION=0 -I/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/include/python3.8 -c bit_count.c -o build/temp.macosx-10.14-x86_64-3.8/bit_count.o -W -Wall -Wno-invalid-offsetof -Wno-deprecated-declarations
creating build/lib.macosx-10.14-x86_64-3.8
clang -bundle -undefined dynamic_lookup -arch arm64 -arch x86_64 -Wl,-headerpad,0x1000 build/temp.macosx-10.14-x86_64-3.8/bit_count.o -o build/lib.macosx-10.14-x86_64-3.8/bit_count.cpython-38-darwin.so
$
$ (cd build/lib.macosx-10.14-x86_64-3.8; python3)
Python 3.8.9 (default, Feb 18 2022, 07:45:34)
[Clang 13.1.6 (clang-1316.0.21.2)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bit_count import bit_count
>>> bit_count(0b1)
1
>>> bit_count(0b11)
2
>>> bit_count(0b110)
2
>>> bit_count(0b1101)
3
>>> bit_count(0b01101)
3
>>> bit_count(0b011011)
4
>>> bit_count(0b0110110)
4
>>> bit_count(0b01101101)
5
>>> bit_count(0b011011011)
6
>>> bit_count(0b0110110111)
7
>>> bit_count(0b011011011100)
7
>>> bit_count(0b01101101110011)
9
>>> bit_count(0b001101101110011)
9
>>> bit_count(0b1001101101110011)
10
1
1
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1