int 型の bit_count()
Python 3.10 以降には int 型に bit_count() が追加されましたが、それより前の Python にはありません。
xはint型
bin(x).count('1') # 3.9 以前
x.bit_count() # 3.10 で追加
拡張モジュールの訓練材料としは良い例だと思うので bit_count のみのモジュールを作ってみました。
xはint型
bit_count.bit_count(x)
動作確認している環境
OS
macOS 12.2.1
C コンパイラ (Xcode 13.2.1)
Apple clang version 13.0.0 (clang-1300.0.29.30)
Target: x86_64-apple-darwin21.3.0
Python : /usr/bin/python3
Python 3.8.9 (default, Oct 26 2021, 07:25:54)
[Clang 13.0.0 (clang-1300.0.29.30)] on darwin
bit_count.c
#ifndef PY_SSIZE_T_CLEAN
#define PY_SSIZE_T_CLEAN
#endif /* PY_SSIZE_T_CLEAN */
#include <Python.h>
#if (defined(i386) || \
defined(__i386) || \
defined(__i386__) || \
defined(__i486__) || \
defined(__i586__) || \
defined(__i686__) || \
defined(__amd64) || \
defined(__amd64__) || \
defined(__x86_64) || \
defined(__x86_64__) || \
0)
#define ISA_X86 1
#else
#define ISA_X86 0
#endif
#ifndef USE_POPCOUNT
# if defined(__clang__) && defined(__has_builtin) && \
__has_builtin(__builtin_popcount)
# define HAS_POPCOUNT 1
# define USE_POPCOUNT 1
# endif
#else
# define HAS_POPCOUNT 0
#endif
/*
* Method: bit_count
*/
#if PyLong_SHIFT <= 16
inline static digit
stdc_bit_count(digit x)
{
x = (x & 0x5555) + ((x >> 1) & 0x5555);
x = (x & 0x3333) + ((x >> 2) & 0x3333);
x = (x & 0x0f0f) + ((x >> 4) & 0x0f0f);
x = (x & 0x00ff) + ((x >> 8) & 0x00ff);
return x;
}
#elif PyLong_SHIFT <= 32
inline static digit
stdc_bit_count(digit x)
{
x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f);
x = (x & 0x00ff00ff) + ((x >> 8) & 0x00ff00ff);
x = (x & 0x0000ffff) + ((x >> 16) & 0x0000ffff);
return x;
}
#else
#error "unknown PyLong_SHIFT"
#endif
#if HAS_POPCOUNT
static int has_popcnt = 1;
inline static digit
extc_bit_count(digit x)
{
return __builtin_popcount(x);
}
#if ISA_X86
#define HAS_CHECK_POPCNT 1
typedef struct cpuid_result {
unsigned int eax; /* reg:00 */
unsigned int ecx; /* reg:01 */
unsigned int edx; /* reg:10 */
unsigned int ebx; /* reg:11 */
} cpuid_result;
inline static void
check_popcnt()
{
cpuid_result res;
res.eax = 1;
res.ecx = 0;
res.edx = 0;
res.ebx = 0;
__asm__ volatile (
"mov %0,%%eax;"
"mov %1,%%ecx;"
"xor %%edx,%%edx;"
"xor %%ebx,%%ebx;"
"cpuid;"
"mov %%eax,%0;"
"mov %%ecx,%1;"
"mov %%edx,%2;"
"mov %%ebx,%3;"
: "+m" (res.eax),
"+m" (res.ecx),
"+m" (res.edx),
"+m" (res.ebx)
: /**/
: "eax", "ecx", "edx", "ebx");
has_popcnt = (res.ecx >> 23) & 1;
}
#endif /* ISA_X86 */
#else /* !HAS_POPCOUNT */
static int has_popcnt = 0;
inline static digit
extc_bit_count(digit x)
{
return stdc_bit_count(x);
}
#endif /* HAS_POPCOUNT */
static PyObject *
method_bit_count(PyObject *unused, PyObject *const *args, Py_ssize_t size)
{
PyLongObject *x;
Py_ssize_t xsize;
Py_ssize_t i;
unsigned long count;
(void) unused;
if (size != 1) {
PyErr_SetString(PyExc_TypeError, "bit_count expected at 1 argument");
return NULL;
}
if (!PyLong_CheckExact(args[0])) {
PyErr_SetObject(PyExc_TypeError, args[0]);
return NULL;
}
x = (PyLongObject *) args[0];
xsize = Py_ABS(Py_SIZE(x));
count = 0;
if (has_popcnt) {
for (i = 0; i < xsize; i++)
count += extc_bit_count(x->ob_digit[i]);
}
else {
for (i = 0; i < xsize; i++)
count += stdc_bit_count(x->ob_digit[i]);
}
return PyLong_FromUnsignedLong(count);
}
/*
* Module: bit_count
*/
static PyMethodDef bit_count_methods[] = {
{"bit_count", (PyCFunction) method_bit_count, METH_FASTCALL,
"bit_count(x: int) -> int: same as bin(x).count('1')\n"},
{NULL, NULL, 0, NULL}, /* end */
};
static PyModuleDef bit_count_def = {
PyModuleDef_HEAD_INIT,
.m_name = "bit_count",
.m_doc = NULL,
.m_size = -1,
.m_methods = bit_count_methods,
};
PyMODINIT_FUNC
PyInit_bit_count(void)
{
#if HAS_CHECK_POPCNT
check_popcnt();
#endif /* HAS_CHECK_POPCNT */
#ifndef NDEBUG
printf("has_popcnt: %d\n", has_popcnt);
#endif
return PyModule_Create(&bit_count_def);
}
ビルドとテスト
ビルドは面倒の少ない distutils の setup.py 形式を使います。
setup.py
#!/usr/bin/env python3
import os
from distutils.core import setup, Extension
def getenv(name, defval=None):
if name in os.environ:
return os.environ[name]
return defval
DEBUG = getenv('DEBUG') in ('true', 'yes')
MAJOR_VERSION = 0
MINOR_VERSION = 1
DEBUG_VERSION = 0
VERSION = '%d.%d.%d' % (MAJOR_VERSION, MINOR_VERSION, DEBUG_VERSION)
DEFINE_MACROS = [
('MAJOR_VERSION', MAJOR_VERSION),
('MINOR_VERSION', MINOR_VERSION),
('DEBUG_VERSION', DEBUG_VERSION),
]
UNDEF_MACROS = []
EXTRA_COMPILE_ARGS = [
'-W',
'-Wall',
'-Wno-invalid-offsetof',
'-Wno-deprecated-declarations',
]
if DEBUG:
DEFINE_MACROS.append(('DEBUG', 1))
UNDEF_MACROS.append('NDEBUG')
EXTRA_COMPILE_ARGS.append('-O0')
pass
setup(name='bit_count',
version=VERSION,
description='',
ext_modules=[Extension(
name='bit_count',
define_macros=DEFINE_MACROS,
undef_macros=UNDEF_MACROS,
extra_compile_args=EXTRA_COMPILE_ARGS,
sources=['bit_count.c'])])
ターミナル上でのビルドとテスト
$ python3 setup.py build
running build
running build_ext
building 'bit_count' extension
creating build
creating build/temp.macosx-10.14-x86_64-3.8
clang -Wno-unused-result -Wsign-compare -Wunreachable-code -fno-common -dynamic -DNDEBUG -g -fwrapv -O3 -Wall -iwithsysroot/System/Library/Frameworks/System.framework/PrivateHeaders -iwithsysroot/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/Headers -arch arm64 -arch x86_64 -Werror=implicit-function-declaration -DMAJOR_VERSION=0 -DMINOR_VERSION=1 -DDEBUG_VERSION=0 -I/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/include/python3.8 -c bit_count.c -o build/temp.macosx-10.14-x86_64-3.8/bit_count.o -W -Wall -Wno-invalid-offsetof -Wno-deprecated-declarations
creating build/lib.macosx-10.14-x86_64-3.8
clang -bundle -undefined dynamic_lookup -arch arm64 -arch x86_64 -Wl,-headerpad,0x1000 build/temp.macosx-10.14-x86_64-3.8/bit_count.o -o build/lib.macosx-10.14-x86_64-3.8/bit_count.cpython-38-darwin.so
$
$ (cd build/lib.macosx-10.14-x86_64-3.8; python3)
Python 3.8.9 (default, Feb 18 2022, 07:45:34)
[Clang 13.1.6 (clang-1316.0.21.2)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from bit_count import bit_count
>>> bit_count(0b1)
1
>>> bit_count(0b11)
2
>>> bit_count(0b110)
2
>>> bit_count(0b1101)
3
>>> bit_count(0b01101)
3
>>> bit_count(0b011011)
4
>>> bit_count(0b0110110)
4
>>> bit_count(0b01101101)
5
>>> bit_count(0b011011011)
6
>>> bit_count(0b0110110111)
7
>>> bit_count(0b011011011100)
7
>>> bit_count(0b01101101110011)
9
>>> bit_count(0b001101101110011)
9
>>> bit_count(0b1001101101110011)
10