1
0

More than 1 year has passed since last update.

dict の派生クラスを C 言語のモジュールとして作る

Last updated at Posted at 2022-03-07

dict の派生クラス remdict

こちらの記事の C 言語版(モジュール)です。

辞書の全てのキーが int 型で、$0$ 以上 $2^{31}$ 未満ならば、自動判定した除数の剰余による逆引き表(配列)を使って、キーに対応する値を取り出します。除数は記事「規則性の低い数列表に対する無検索逆引き」の方法を使ってを求めます。

辞書へのアクセスを監視して、更新目的のメソッドが呼び出された後に、読み出し目的のメソッドが呼び出されるとき、全てのキーが int 型で、$0$ 以上 $2^{31}$ 未満ならば、剰余による逆引き表(配列)を生成します。 逆引き表の生成はかなり遅い (「リードオンリー辞書 ModDict」は逆引き表生成を高速化)です。逆引き表があれば、キーの剰余を使って対応する値を取り出します。

更新と読み出しを繰り返すと、その度に逆引き表の生成(遅い)が発生するので効率が良くありません。よって、完成した辞書を使って繰り返しアクセスする処理(読み出し専用)に適しています。

追加メソッド
object.divisor()          # キーに対する除数(逆引き表が生成されていない場合は None)
object.modkeys()          # 剰余に対するキーの表
object.mkvalues()         # 剰余に対する辞書の値の表
object.remainder_index()  # 剰余に対する keys(), values(), items() へのインデックス表
object.suspended()        # 逆引き表の生成を中断している場合は true
object.suspend()          # 逆引き表の生成を中断
object.resume()           # 逆引き表の生成を再開 {suspend()した場合}
@classmethod
remdict.forindex(keys)    # keys に対して配列番号を 0 から割り当ててオブジェクトを作る

動作確認している環境

OS
  macOS 12.2.1

C コンパイラ (Xcode 13.2.1)

  Apple clang version 13.0.0 (clang-1300.0.29.30)
  Target: x86_64-apple-darwin21.3.0

Python : /usr/bin/python3

  Python 3.8.9 (default, Oct 26 2021, 07:25:54)
  [Clang 13.0.0 (clang-1300.0.29.30)] on darwin

です。

C 言語による Python モジュール

Python スクリプトではないので、C コンパイラでコンパイルする必要があります。

remdict モジュールのソース (やや長いので折りたたみ)
remdict.c
#ifndef PY_SSIZE_T_CLEAN
#define PY_SSIZE_T_CLEAN
#endif /* PY_SSIZE_T_CLEAN */

#include <Python.h>

#if PY_VERSION_HEX >= 0x03090000
#include <genericaliasobject.h>
#endif
#ifndef Py_TPFLAGS_MAPPING
#define Py_TPFLAGS_MAPPING 0
#endif

#define REMDICT_USE_SUPER       0
#define REMDICT_USE_LONGOBJECT  1

/*
 *
 */

#if !defined(NDEBUG) && (_DEBUG || DEBUG)
#define REMDICT_DEBUG  1
#define REMDICT_PRINT(x)  printf x
#define REMDICT_LOG(x)    { printf("%s(%d): ", __FILE__, __LINE__); printf x; putchar('\n'); }
#else
#define REMDICT_DEBUG  0
#define REMDICT_PRINT(x)
#define REMDICT_LOG(x)
#endif

#define UNUSED(x)  ((void)x)

inline static int
IsNull(PyObject *object)
{
    return !object || (object == Py_None);
}

#define SetNull(p) SetNULL((void **) (p))
inline static void
SetNULL(void **buffer)
{
    void *p = *buffer;
    buffer = NULL;

    PyMem_RawFree(p);
}

inline static PyObject *
IncRef(PyObject *object)
{
    Py_INCREF(object);
    return object;
}

inline static void
ClearObject(PyObject **object)
{
    PyObject *p = *object;

    *object = NULL;
    Py_XDECREF(p);
}

inline static PyObject *
SetObject(PyObject **oldobj, PyObject *newobj)
{
    PyObject *old = *oldobj;

    Py_INCREF(newobj);
    Py_XDECREF(old);
    *oldobj = newobj;
    return newobj;
}

inline static PyObject *
SetNone(PyObject **oldobj)
{
    return SetObject(oldobj, Py_None);
}

inline static PyObject *
NewNone()
{
    return IncRef(Py_None);
}

inline static PyObject *
SuperDesc(PyObject *self, const char *name)
{
    PyTypeObject *type = (PyType_Check(self) ? (PyTypeObject *) self : Py_TYPE(self));
    return PyDict_GetItemString(type->tp_base->tp_dict, name);
}

static PyObject *
SuperF(PyObject *self, const char *name, PyObject *const *args, Py_ssize_t size)
{
    PyObject *desc = SuperDesc(self, name);
    PyMethodDef *methdef;
    _PyCFunctionFast func;

    if (!desc)
        return NULL;
    methdef = ((PyMethodDescrObject *) desc)->d_method;
    func = ((_PyCFunctionFast) methdef->ml_meth);
    return func(self, args, size);
}

#if 0
static PyObject *
SuperFK(PyObject *self, const char *name, PyObject *const *args, Py_ssize_t size, PyObject *kwargs)
{
    PyObject *desc = SuperDesc(self, name);
    PyMethodDef *methdef;
    _PyCFunctionFastWithKeywords func;

    if (!desc)
        return NULL;
    methdef = ((PyMethodDescrObject *) desc)->d_method;
    func = ((_PyCFunctionFastWithKeywords) methdef->ml_meth);
    return func(self, args, size, kwargs);
}
#endif

#define SuperO  SuperV
#define SuperN  SuperV
static PyObject *
SuperV(PyObject *self, const char *name, PyObject *args)
{
    PyObject *desc = SuperDesc(self, name);
    PyMethodDef *methdef;
    PyCFunction func;

    if (!desc)
        return NULL;
    methdef = ((PyMethodDescrObject *) desc)->d_method;
    func = ((PyCFunction) methdef->ml_meth);
    return func(self, args);
}

static PyObject *
SuperVK(PyObject *self, const char *name, PyObject *args, PyObject *kwargs)
{
    PyObject *desc = SuperDesc(self, name);
    PyMethodDef *methdef;
    PyCFunctionWithKeywords func;

    if (!desc)
        return NULL;
    methdef = ((PyMethodDescrObject *) desc)->d_method;
    func = ((PyCFunctionWithKeywords) methdef->ml_meth);
    return func(self, args, kwargs);
}

/*
 * Object: remdict
 */

typedef struct RemDictObject {
    PyDictObject super;
    uint8_t suspend;
    uint8_t modified;
    digit divisor;
    PyObject *divisor_;
    PyObject *keys;
    PyObject *values;
    digit *remainder;
    digit *key_cache;
} RemDictObject;

enum {
    REMDICT_KEY_ERROR = -2,
    REMDICT_KEY_FAILED = -1,
};

/* ******** */

static PyTypeObject *get_remdict_type();

static int
remdict_create_table(RemDictObject *self)
{
    PyObject *dict_keys = NULL, *dict_vals = NULL;
    PyObject *mod_keys = NULL, *mod_vals = NULL;
    PyObject *mod_key = NULL;
    PyObject *key = NULL, *val = NULL;
    PyObject *divisor_ = NULL;

    digit divisor, divmax, digmax;
    digit *mod_table = NULL;
    digit *remainder = NULL;
    digit *key_cache = NULL;
    Py_ssize_t dict_size;
    Py_ssize_t dict_pos, rem_pos, key_pos, mod_count;
    long long key_num;
    int overflow, key_overflow;
    int injective;
    int res = -1;

    digmax = 0x7fffffff;

    self->modified = 0;
    self->divisor = 0;
    SetNone(&self->divisor_);
    SetNone(&self->keys);
    SetNone(&self->values);
    SetNull(&self->remainder);
    SetNull(&self->key_cache);

    if (!(dict_size = PyDict_Size((PyObject *) self)))
        return 0;
    if (dict_size > digmax)
        return 0;
    if (!(dict_keys = PyTuple_New(dict_size)))
        goto error;
    if (!(dict_vals = PyTuple_New(dict_size)))
        goto error;
    if (!(mod_table = PyMem_RawMalloc(dict_size * sizeof(digit))))
        goto error;

    overflow = 0;
    divmax = dict_size;

    dict_pos = rem_pos = 0;
    while (PyDict_Next((PyObject *) self, &dict_pos, &key, &val)) {
        if (!PyLong_CheckExact(key)) {
            res = 0;
            goto error;
        }
        if (!overflow) {
            key_num = PyLong_AsLongLongAndOverflow(key, &key_overflow);
            if (key_overflow > 0)
                goto key_overflow;
            if ((key_overflow < 0) || (key_num < 0)) {
                res = 0;
                goto error;
            }
            if (key_num <= digmax) {
                key_num = Py_MIN(key_num, digmax);
                divmax = Py_MAX(divmax, ((digit) key_num));
            } else {
            key_overflow:
                overflow = !0;
                divmax = digmax;
            }
        }

        assert(rem_pos < dict_size);
        PyTuple_SET_ITEM(dict_keys, rem_pos, IncRef(key));
        PyTuple_SET_ITEM(dict_vals, rem_pos, IncRef(val));
        rem_pos++;

        key = val = NULL;
    }
    if (!divmax) {
        res = 0;
        goto error;
    }

    injective = 0;
    for (divisor = (digit) dict_size; divisor <= divmax; divisor++) {
        ClearObject(&divisor_);
        if (!(divisor_ = PyLong_FromUnsignedLong(divisor)))
            goto error;

        ClearObject(&mod_keys);
        if (!(mod_keys = PyTuple_New(divisor)))
            goto error;
        for (key_pos = 0; key_pos < dict_size; key_pos++) {
            key = IncRef(PyTuple_GET_ITEM(dict_keys, key_pos));
            if (!(mod_key = PyNumber_Remainder(key, divisor_)))
                goto error;
            mod_table[key_pos] = rem_pos = PyLong_AsUnsignedLong(mod_key);
            PyTuple_SET_ITEM(mod_keys, rem_pos, IncRef(key));
            ClearObject(&key);
            ClearObject(&mod_key);
        }

        mod_count = 0;
        for (key_pos = 0; key_pos < divisor; key_pos++)
            if (PyTuple_GET_ITEM(mod_keys, key_pos))
                mod_count++;
        if ((injective = (mod_count == dict_size)))
            break;
    }
    if (!injective) {
        res = 0;
        goto error;
    }

    if (!(mod_vals = PyTuple_New(divisor)))
        goto error;
    if (!(remainder = PyMem_RawMalloc(divisor * sizeof(digit))))
        goto error;
    for (rem_pos = 0; rem_pos < divisor; rem_pos++)
        remainder[rem_pos] = divisor;
    for (key_pos = 0; key_pos < dict_size; key_pos++) {
        rem_pos = mod_table[key_pos];
        val = PyTuple_GET_ITEM(dict_vals, key_pos);
        PyTuple_SET_ITEM(mod_vals, rem_pos, IncRef(val));
        remainder[rem_pos] = key_pos;
    }
    val = NULL;

    if (!overflow) {
        if (!(key_cache = PyMem_RawMalloc(divisor * sizeof(digit))))
            goto error;
        for (rem_pos = 0; rem_pos < divisor; rem_pos++)
            key_cache[rem_pos] = ~0;
        for (key_pos = 0; key_pos < dict_size; key_pos++)
            key_cache[mod_table[key_pos]] = PyLong_AsLongLong(PyTuple_GET_ITEM(dict_keys, key_pos));
    }

    self->divisor = divisor;
    self->divisor_ = divisor_;
    self->keys = mod_keys;
    self->values = mod_vals;
    self->remainder = remainder;
    self->key_cache = key_cache;

    res = 0;
    goto success;

error:
    Py_XDECREF(divisor_);
    Py_XDECREF(mod_keys);
    Py_XDECREF(mod_vals);
    PyMem_RawFree(remainder);
    PyMem_RawFree(key_cache);
success:
    Py_XDECREF(dict_keys);
    Py_XDECREF(dict_vals);
    Py_XDECREF(mod_key);
    Py_XDECREF(key);
    Py_XDECREF(val);

    PyMem_RawFree(mod_table);
    return res;
}

inline static int
remdict_update_table(RemDictObject *self)
{
    return !self->modified ? 0 : remdict_create_table(self);
}

static int
remdict_check_remainder(RemDictObject *self, PyObject *key)
{
    PyObject *mod;
    PyObject *mod_key;
    digit rem, lkey;
    long long ikey;
    int overflow;

    if (self->key_cache) {
        if (REMDICT_USE_LONGOBJECT) {
            if (Py_SIZE(key) < 1)
                return REMDICT_KEY_FAILED;
            if (Py_SIZE(key) > 2)
                return REMDICT_KEY_FAILED;
            lkey = ((PyLongObject *) key)->ob_digit[0];
            if (Py_SIZE(key) == 2) {
                ikey = lkey | (((PyLongObject *) key)->ob_digit[1] << PYLONG_BITS_IN_DIGIT);
                if ((ikey >> 32))
                    return REMDICT_KEY_FAILED;
                lkey = (digit) ikey;
            }
            rem = lkey % self->divisor;
            if (lkey != self->key_cache[rem])
                return REMDICT_KEY_FAILED;
        }
        else {
            ikey = PyLong_AsLongLongAndOverflow(key, &overflow);
            if (overflow)
                return REMDICT_KEY_FAILED;
            rem = ikey % self->divisor;
            if (ikey != self->key_cache[rem])
                return REMDICT_KEY_FAILED;
        }
        return (int) rem;
    }
    else {
        if (!(mod = PyNumber_Remainder(key, self->divisor_)))
            return REMDICT_KEY_ERROR;
        rem = PyLong_AsUnsignedLong(mod);
        Py_DECREF(mod);

        mod_key = PyTuple_GET_ITEM(self->keys, rem);
        if (IsNull(mod_key))
            return REMDICT_KEY_FAILED;
        if (!PyObject_RichCompareBool(key, mod_key, Py_EQ))
            return REMDICT_KEY_FAILED;
        return (int) rem;
    }
}

inline static int
remdict_check_key(RemDictObject *self, PyObject *key)
{
    if (!PyLong_CheckExact(key))
        return REMDICT_KEY_ERROR;
    if (self->suspend)
        return REMDICT_KEY_ERROR;
    if (remdict_update_table(self) < 0)
        return REMDICT_KEY_ERROR;
    if (!self->divisor)
        return REMDICT_KEY_ERROR;
    return remdict_check_remainder(self, key);
}

inline static PyObject *
remdict_get_remainder_value(RemDictObject *self, int rem)
{
    return IncRef(PyTuple_GET_ITEM(self->values, rem));
}

static PyObject *
remdict_from_dict(PyObject *dict)
{
    PyTypeObject *type = get_remdict_type();
    PyObject *newobj = NULL;
    PyObject *kwargs = NULL;
    PyObject *args = NULL;

    if (!dict)
        return NULL;
    if (!(args = PyTuple_New(1)))
        goto error;
    if (!(kwargs = PyDict_New()))
        goto error;
    PyTuple_SET_ITEM(args, 0, IncRef(dict));
    if (!(newobj = (type->tp_new)(type, args, kwargs)))
        goto error;
    if ((Py_TYPE(newobj)->tp_init)((PyObject *) newobj, args, kwargs) != 0)
        goto error;
    Py_XDECREF(args);
    Py_XDECREF(kwargs);
    return IncRef(newobj);

error:
    Py_XDECREF(args);
    Py_XDECREF(kwargs);
    Py_XDECREF(newobj);
    return NULL;
}

static PyObject *
remdict_new()
{
    PyTypeObject *type = get_remdict_type();
    PyObject *obj = NULL;
    PyObject *self = NULL;
    PyObject *args = NULL;
    PyObject *kwargs = NULL;

    if (!(args = PyTuple_New(0)))
        goto error;
    if (!(kwargs = PyDict_New()))
        goto error;
    if (!(self = (type->tp_new)(type, args, kwargs)))
        goto error;
    if ((type->tp_init)(self, args, kwargs) < 0)
        goto error;
    obj = self;
    goto success;
error:
    Py_XDECREF(self);
success:
    Py_XDECREF(args);
    Py_XDECREF(kwargs);
    return obj;
}

/* ******** */

static int
remdict_init(RemDictObject *self, PyObject *args, PyObject *kwargs)
{
    int result;

    self->suspend = 0;
    self->modified = !0;
    self->divisor = 0;
    self->divisor_ = NewNone();
    self->keys = NewNone();
    self->values = NewNone();
    self->remainder = NULL;
    self->key_cache = NULL;
    result = (Py_TYPE(self)->tp_base->tp_init)((PyObject *) self, args, kwargs);
    if (result >= 0)
        result = remdict_update_table(self);
    return result;
}

static void
remdict_finalize(RemDictObject *self)
{
    Py_XDECREF(self->divisor_);
    Py_XDECREF(self->keys);
    Py_XDECREF(self->values);
    PyMem_RawFree(self->remainder);
    PyMem_RawFree(self->key_cache);
}

/* ******** */

static PyObject *
remdict_divisor(RemDictObject *self)
{
    remdict_update_table(self);
    return IncRef(self->divisor_);
}

static PyObject *
remdict_create_mkvtable(PyObject *table, PyObject *defval)
{
    PyObject *rval = NULL;
    PyObject *obj = NULL;
    Py_ssize_t size, pos;

    size = PyTuple_Size(table);
    if (!(rval = PyTuple_New(size)))
        return NULL;
    for (pos = 0; pos < size; pos++) {
        obj = PyTuple_GET_ITEM(table, pos);
        if (IsNull(obj))
            obj = defval;
        PyTuple_SET_ITEM(rval, pos, IncRef(obj));
    }
    return rval;
}

static PyObject *
remdict_modkeys(RemDictObject *self, PyObject *args)
{
    PyObject *defval = Py_None;

    if (!PyArg_ParseTuple(args, "|O", &defval))
        return NULL;
    if ((remdict_update_table(self) < 0) || !self->divisor)
        return PyTuple_New(0);
    return remdict_create_mkvtable(self->keys, defval);
}

static PyObject *
remdict_mkvalues(RemDictObject *self, PyObject *args)
{
    PyObject *defval = Py_None;

    if (!PyArg_ParseTuple(args, "|O", &defval))
        return NULL;
    if ((remdict_update_table(self) < 0) || !self->divisor)
        return PyTuple_New(0);
    return remdict_create_mkvtable(self->values, defval);
}

static PyObject *
remdict_remainder_index(RemDictObject *self, PyObject *args)
{
    PyObject *defval = Py_None;
    PyObject *rval = NULL;
    PyObject *remainder;
    Py_ssize_t rem_pos;
    digit index;

    if (!PyArg_ParseTuple(args, "|O", &defval))
        return NULL;
    if ((remdict_update_table(self) < 0) || !self->divisor)
        return PyTuple_New(0);
    if (!(rval = PyTuple_New(self->divisor)))
        goto error;
    for (rem_pos = 0; rem_pos < self->divisor; rem_pos++) {
        index = self->remainder[rem_pos];
        if (index >= self->divisor)
            remainder = IncRef(defval);
        else if (!(remainder = PyLong_FromUnsignedLong(index)))
            goto error;
        PyTuple_SET_ITEM(rval, rem_pos, remainder);
    }
    return rval;

error:
    Py_XDECREF(rval);
    return NULL;
}

static PyObject *
remdict_suspended(RemDictObject *self)
{
    return PyBool_FromLong(self->suspend);
}

static PyObject *
remdict_suspend(RemDictObject *self)
{
    self->suspend = !0;
    return NewNone();
}

static PyObject *
remdict_resume(RemDictObject *self)
{
    self->suspend = 0;
    return NewNone();
}

static PyObject *
remdict_forindex(PyObject *klass, PyObject *iterable)
{
    PyObject *obj = NULL;
    PyObject *self = NULL;
    PyObject *it = NULL;
    PyObject *key = NULL;
    PyObject *val = NULL;
    Py_ssize_t pos;

    UNUSED(klass);

    if (!(self = remdict_new()))
        goto error;
    if (!(it = PyObject_GetIter(iterable)))
        goto error;
    for (pos = 0; (key = PyIter_Next(it)); pos++) {
        if (!(val = PyLong_FromSsize_t(pos)))
            goto error;
        if (PyDict_SetItem(self, key, val) < 0)
            goto error;
        ClearObject(&key);
        ClearObject(&val);
    }
    ((RemDictObject *) self)->modified = !0;
    obj = self;
    goto success;
error:
    Py_XDECREF(self);
success:
    Py_XDECREF(key);
    Py_XDECREF(val);
    Py_XDECREF(it);
    return obj;
}

/* override */

static PyObject *
remdict___contains__(RemDictObject *self, PyObject *key)
{
    int res = remdict_check_key(self, key);
    if (res >= 0)
        return IncRef(Py_True);
    if (res == REMDICT_KEY_FAILED)
        return IncRef(Py_False);
    if (REMDICT_USE_SUPER)
        return SuperO((PyObject *) self, "__contains__", key);
    res = PyDict_Contains((PyObject *) self, key);
    return (res >= 0) ? IncRef(res ? Py_True : Py_False) : NULL;
}

static PyObject *
remdict___getitem__(RemDictObject *self, PyObject *key)
{
    int rem = remdict_check_key(self, key);

    if (rem >= 0)
        return remdict_get_remainder_value(self, rem);
    if (rem == REMDICT_KEY_FAILED) {
        PyErr_SetObject(PyExc_KeyError, key);
        return NULL;
    }
    if (REMDICT_USE_SUPER)
        return SuperO((PyObject *) self, "__getitem__", key);
    return PyDict_GetItem((PyObject *) self, key);
}

static PyObject *
remdict_operator_or(RemDictObject *self, PyObject *other)
{ /* __or__ */
    PyNumberMethods *nb;
    binaryfunc op;

    if ((nb = Py_TYPE(self)->tp_base->tp_as_number) && (op = nb->nb_or))
        return remdict_from_dict(op((PyObject *) self, other));
    PyErr_SetString(PyExc_TypeError, "unsupported operand type for |: remdict(dict)");
    return NULL;
}

static PyObject *
remdict_operator_ior(RemDictObject *self, PyObject *other)
{ /* __ior__ */
    PyNumberMethods *nb;
    binaryfunc op;

    if ((nb = Py_TYPE(self)->tp_base->tp_as_number) && (op = nb->nb_inplace_or)) {
        self->modified = !0;
        return op((PyObject *) self, other);
    }
    PyErr_SetString(PyExc_TypeError, "unsupported operand type for |=: remdict(dict)");
    return NULL;
}

static int
remdict_sequence_contains(RemDictObject *self, PyObject *key)
{ /* __[key] */
    PySequenceMethods *sq;
    objobjproc proc;
    int res;

    if ((res = remdict_check_key(self, key)) != REMDICT_KEY_ERROR)
        return (res >= 0);
    if ((sq = Py_TYPE(self)->tp_base->tp_as_sequence) && (proc = sq->sq_contains))
        return proc((PyObject *) self, key);
    PyErr_SetString(PyExc_TypeError, "'remdict(dict)' object is not subscriptable");
    return 0;
}

static Py_ssize_t
remdict_length(RemDictObject *self)
{
    return (Py_TYPE(self)->tp_base->tp_as_mapping->mp_length)((PyObject *) self);
}

static PyObject *
remdict_subscript(RemDictObject *self, PyObject *key)
{
    int rem = remdict_check_key(self, key);

    if (rem >= 0)
        return remdict_get_remainder_value(self, rem);
    if (rem == REMDICT_KEY_FAILED) {
        PyErr_SetObject(PyExc_KeyError, key);
        return NULL;
    }
    return (Py_TYPE(self)->tp_base->tp_as_mapping->mp_subscript)((PyObject *) self, key);
}

static int
remdict_ass_subscript(RemDictObject *self, PyObject *key, PyObject *item)
{
    self->modified = !0;
    return (Py_TYPE(self)->tp_base->tp_as_mapping->mp_ass_subscript)((PyObject *) self, key, item);
}

static PyObject *
remdict_clear(RemDictObject *self, PyObject *args)
{
    self->modified = !0;
    if (REMDICT_USE_SUPER)
        return SuperN((PyObject *) self, "clear", args);
    PyDict_Clear((PyObject *) self);
    return NewNone();
}

static PyObject *
remdict_get(RemDictObject *self, PyObject *args)
{
    PyObject *key = NULL;
    PyObject *defval = Py_None;
    PyObject *item;
    int rem;

    if (!PyArg_ParseTuple(args, "O|O", &key, &defval))
        return NULL;
    if ((rem = remdict_check_key(self, key)) >= 0)
        return remdict_get_remainder_value(self, rem);
    if (rem == REMDICT_KEY_FAILED)
        return IncRef(defval);
    if (!(item = PyDict_GetItem((PyObject *) self, key)))
        item = defval;
    return IncRef(item);
}

static PyObject *
remdict_pop(RemDictObject *self, PyObject *const *args, Py_ssize_t size)
{
    self->modified = !0;
    return SuperF((PyObject *) self, "pop", args, size);
}

static PyObject *
remdict_popitem(RemDictObject *self, PyObject *args)
{
    self->modified = !0;
    return SuperN((PyObject *) self, "popitem", args);
}

static PyObject *
remdict_setdefault(RemDictObject *self, PyObject *const *args, Py_ssize_t size)
{
    PyObject *key = Py_None;
    PyObject *val = Py_None;
    int rem;

    if (size == 0) {
        PyErr_SetString(PyExc_TypeError, "setdefault expected at least 1 argument");
        return NULL;
    }
    if (size > 2) {
        PyErr_SetString(PyExc_TypeError, "setdefault expected at most 2 arguments");
        return NULL;
    }

    if ((rem = remdict_check_key(self, args[0])) >= 0)
        return remdict_get_remainder_value(self, rem);

    self->modified = !0;
    if (REMDICT_USE_SUPER)
        return SuperF((PyObject *) self, "setdefault", args, size);
    key = args[0];
    if (size > 1)
        val = args[1];
    return PyDict_SetDefault((PyObject *) self, key, val);
}

static PyObject *
remdict_update(RemDictObject *self, PyObject *args, PyObject *kwargs)
{
    self->modified = !0;
    return SuperVK((PyObject *) self, "update", args, kwargs);
}

static PyObject *
remdict_copy(RemDictObject *self, PyObject *args)
{
    return remdict_from_dict(SuperN((PyObject *) self, "copy", args));
}

static PyObject *
remdict_fromkeys(PyObject *klass, PyObject *const *args, Py_ssize_t size)
{
    PyObject *self;

    if ((self = SuperF(klass, "fromkeys", args, size)))
        ((RemDictObject *) self)->modified = !0;
    return self;
}

/*
 * Type: remdict
 */

static PyNumberMethods remdict_as_number = {
    .nb_or = (binaryfunc) remdict_operator_or,
    .nb_inplace_or = (binaryfunc) remdict_operator_ior,
};

static PySequenceMethods remdict_as_sequence = {
    .sq_contains = (objobjproc) remdict_sequence_contains,
};

static PyMappingMethods remdict_as_mapping = {
    (lenfunc) remdict_length,
    (binaryfunc) remdict_subscript,
    (objobjargproc) remdict_ass_subscript,
};

static PyMethodDef remdict_methods[] = {
    {"__contains__", (PyCFunction) remdict___contains__, METH_O, NULL},
    {"__getitem__", (PyCFunction) remdict___getitem__, METH_O, NULL},

    {"divisor", (PyCFunction) remdict_divisor, METH_NOARGS, NULL},
    {"modkeys", (PyCFunction) remdict_modkeys, METH_VARARGS, NULL},
    {"mkvalues", (PyCFunction) remdict_mkvalues, METH_VARARGS, NULL},
    {"remainder_index", (PyCFunction) remdict_remainder_index, METH_VARARGS, NULL},
    {"suspended", (PyCFunction) remdict_suspended, METH_NOARGS, NULL},
    {"suspend", (PyCFunction) remdict_suspend, METH_NOARGS, NULL},
    {"resume", (PyCFunction) remdict_resume, METH_NOARGS, NULL},
    {"forindex", (PyCFunction) remdict_forindex, METH_O | METH_CLASS, NULL},

    {"clear", (PyCFunction) remdict_clear, METH_NOARGS, NULL},
    {"get", (PyCFunction) remdict_get, METH_VARARGS, NULL},
    {"pop", (PyCFunction) remdict_pop, METH_FASTCALL, NULL},
    {"popitem", (PyCFunction) remdict_popitem, METH_NOARGS, NULL},
    {"setdefault", (PyCFunction) remdict_setdefault, METH_FASTCALL, NULL},
    {"update", (PyCFunction) remdict_update, METH_VARARGS | METH_KEYWORDS, NULL},
    {"copy", (PyCFunction) remdict_copy, METH_NOARGS, NULL},
    {"fromkeys", (PyCFunction) remdict_fromkeys, METH_FASTCALL | METH_CLASS, NULL},

#if PY_VERSION_HEX >= 0x03090000
    {"__class_getitem__", (PyCFunction) Py_GenericAlias, METH_O | METH_CLASS, NULL},
#endif

    {NULL, NULL, 0, NULL}, /* end */
};

static PyTypeObject RemDictType = {
    PyVarObject_HEAD_INIT(NULL, 0)
    .tp_name = "remdict.remdict",
    .tp_doc = "remdict object",
    .tp_basicsize = sizeof(RemDictObject),
    .tp_itemsize = 0,
    .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DICT_SUBCLASS | Py_TPFLAGS_MAPPING,
    .tp_base = &PyDict_Type,

    .tp_init = (initproc) remdict_init,
    .tp_finalize = (destructor) remdict_finalize,
    .tp_as_number = &remdict_as_number,
    .tp_as_sequence = &remdict_as_sequence,
    .tp_as_mapping = &remdict_as_mapping,
    .tp_methods = remdict_methods,
};

inline static PyTypeObject *
get_remdict_type()
{
    return &RemDictType;
}


/*
 * Module: remdict
 */

typedef struct RemDictState {
    PyObject *error;
} RemDictState;

static PyModuleDef remdict_def = {
    PyModuleDef_HEAD_INIT,
    .m_name = "remdict",
    .m_doc = "extension module for the dict subclass.",
    .m_size = -1,
};

PyMODINIT_FUNC
PyInit_remdict(void)
{
    PyObject *module;

    if (PyType_Ready(get_remdict_type()) < 0)
        return NULL;
    if (!(module = PyModule_Create(&remdict_def)))
        return NULL;

    Py_INCREF(&RemDictType);
    if (PyModule_AddObject(module, "remdict", (PyObject *) &RemDictType) < 0) {
        Py_DECREF(module);
        Py_DECREF(&RemDictType);
        return NULL;
    }
    return module;
}

/*
 * Local Variables:
 * c-file-style: "PEP7"
 * End:
 */
.emacs(PEP7向け:真面目に作ってません。良いのがあったら誰か教えて)
(defconst my-c-style-PEP7
  '((indent-tabs-mode . nil)
    (tab-width . 4)
    (c-basic-offset . 4)
    (c-offsets-alist
     . ((substatement-open . 0)
	(statement-case-open . 4)
	(label . 0)))
    (c-hanging-braces-alist
     . ((namespace-open . (after))
	(class-open . (after))
	(brace-list-open . (after))
	(substatement-open . (after))
	))
    (c-cleanup-list
     . (defun-close-semi)))
  "Python PEP7 C Style.")

(c-add-style "PEP7" my-c-style-PEP7)

ビルド: distutils の setup.py を使う

あれこれインストールが必要がない setup.py を使います。内容は以下のとおり。

setup.py
setup.py
#!/usr/bin/env python3

import os
from distutils.core import setup, Extension


def getenv(name, defval=None):
    if name in os.environ:
        return os.environ[name]
    return defval


DEBUG = getenv('DEBUG') in ('true', 'yes')

MAJOR_VERSION = 0
MINOR_VERSION = 1
DEBUG_VERSION = 0
VERSION = '%d.%d.%d' % (MAJOR_VERSION, MINOR_VERSION, DEBUG_VERSION)

DEFINE_MACROS = [
    ('MAJOR_VERSION', MAJOR_VERSION),
    ('MINOR_VERSION', MINOR_VERSION),
    ('DEBUG_VERSION', DEBUG_VERSION),
]
UNDEF_MACROS = []

if DEBUG:
    DEFINE_MACROS.append(('DEBUG', 1))
    UNDEF_MACROS.append('NDEBUG')

EXTRA_COMPILE_ARGS = [
    '-W',
    '-Wall',
    '-Wno-invalid-offsetof',
    '-Wno-deprecated-declarations',
]

setup(name='remdict',
      version=VERSION,
      description='',
      ext_modules=[Extension(
          name='remdict',
          define_macros=DEFINE_MACROS,
          undef_macros=UNDEF_MACROS,
          extra_compile_args=EXTRA_COMPILE_ARGS,
          sources=['remdict.c'])])

環境変数 DEBUG=true でデバッグ版を生成します。

ビルド
$ python3 setup.py build
running build
running build_ext
building 'remdict' extension
creating build
creating build/temp.macosx-10.14-x86_64-3.8
clang -Wno-unused-result -Wsign-compare -Wunreachable-code -fno-common -dynamic -DNDEBUG -g -fwrapv -O3 -Wall -iwithsysroot/System/Library/Frameworks/System.framework/PrivateHeaders -iwithsysroot/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/Headers -arch arm64 -arch x86_64 -Werror=implicit-function-declaration -DMAJOR_VERSION=0 -DMINOR_VERSION=1 -DDEBUG_VERSION=0 -I/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.8/include/python3.8 -c remdict.c -o build/temp.macosx-10.14-x86_64-3.8/remdict.o -W -Wall -Wno-invalid-offsetof -Wno-deprecated-declarations
creating build/lib.macosx-10.14-x86_64-3.8
clang -bundle -undefined dynamic_lookup -arch arm64 -arch x86_64 -Wl,-headerpad,0x1000 build/temp.macosx-10.14-x86_64-3.8/remdict.o -o build/lib.macosx-10.14-x86_64-3.8/remdict.cpython-38-darwin.so
実行
$ (cd build/lib.macosx-10.14-x86_64-3.8; python3)
Python 3.8.9 (default, Oct 26 2021, 07:25:54)
[Clang 13.0.0 (clang-1300.0.29.30)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from remdict import remdict
>>> m = remdict({ 2:'A', 3:'B', 5:'C', 7:'D', 11:'E', 13:'F' })
>>> m.divisor()
7
>>> m.modkeys()
(0, None, 2, 3, 4, 5, 6)
>>> m.mkvalues()
('D', None, 'A', 'B', 'E', 'C', 'F')
>>> m.remainder_index()
(3, None, 0, 1, 4, 2, 5)
>>> m[2]
'A'
>>> m[3]
'B'
>>> m[5]
'C'
>>> m[7]
'D'
>>> m[11]
'E'
>>> m[13]
'F'
>>> m[0]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
KeyError: 0

速度測定

Pythonスクリプト版
>>> 
>>> import random
>>> from timeit import timeit
>>> from remdict import remdict
>>> 
>>> test_data = tuple(random.choice(range(1<<30)) for _ in range(100))
>>> dr = remdict.forindex(test_data)
>>> dm = dict(dr)
>>> print('divisor:', dr.divisor()) # divisor() アクセスでテーブル生成(遅い…)
divisor: 954
>>> 
>>> test = lambda obj: [obj[key] for key in obj]
>>> dict_test = lambda: test(dm)
>>> remdict_test = lambda: test(dr)
>>> 
>>> num = 100000
>>> print(timeit(dict_test, number=num))
0.4390303929999999
>>> print(timeit(remdict_test, number=num))
5.6698670149999995
>>> print(timeit(dict_test, number=num))
0.44125425200000024
>>> print(timeit(remdict_test, number=num))
5.732929080000002
>>> print(timeit(dict_test, number=num))
0.4732762600000022
>>> print(timeit(remdict_test, number=num))
5.765764815999997
Cモジュール版
>>> import random
>>> from timeit import timeit
>>> from remdict import remdict
>>> 
>>> test_data = tuple(random.choice(range(1<<30)) for _ in range(100))
>>> dr = remdict.forindex(test_data)
>>> dm = dict(dr)
>>> print('divisor:', dr.divisor()) # divisor() アクセスでテーブル生成(遅い…)
divisor: 1332
>>> 
>>> test = lambda obj: [obj[key] for key in obj]
>>> dict_test = lambda: test(dm)
>>> remdict_test = lambda: test(dr)
>>> 
>>> num = 100000
>>> print(timeit(dict_test, number=num))
0.44294175899995025
>>> print(timeit(remdict_test, number=num))
0.4583264450002389
>>> print(timeit(dict_test, number=num))
0.43910693699945114
>>> print(timeit(remdict_test, number=num))
0.4496970539994436
>>> print(timeit(dict_test, number=num))
0.43823496400000295
>>> print(timeit(remdict_test, number=num))
0.4501975190005396

スクリプト版より圧倒的に速くなるのは当然として、dict よりやや劣る。各所のオーバーヘッドや Python/C API の呼び出しが問題なのだろうけど、組み込みと同じ方法(モジュール的には行儀が悪い)でのアクセスは避けたい。

テーブル生成は divisor を単純インクリメントでなく、キーとの関係を調べれてスキップすれば改善しそうだけど面倒臭い…


久しぶりにモジュールを作ったのでイロイロ忘れている。訓練材料としては良かった。

1
0
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0