はじめに
最速のハッシュテーブルを求めて以降, アルゴリズム的に新しいものを実装してきませんでしたが, そろそろ新しいものを取り入れていきたいと思い, SwissTables(Swiss Tables Design Notes)を実装してみました.
Swiss Tables
アルゴリズム概要
戦略は単純で, キーのハッシュ値のうち7bitを切り出し, 別のテーブルに保存しておきます. SIMDのレジスタが128bitの場合16個のハッシュ値の断片を一度に比較できるので, 探索が高速になるのでは?という戦略です.
実装
いいかげんにコピペです. SIMDで比較する以外に特筆すべきことがないのです.
SwissTable
namespace hashmap
{
//--- SwissTable
//-------------------------------------------------------
template<class Key, class Value, class MemoryAllocator=DefaultAllocator>
class SwissTable
{
public:
inline static constexpr u32 Invalid = 0xFFFF'FFFFUL;
inline static constexpr u32 Align = 4;
inline static constexpr u32 AlignMask = Align - 1;
inline static constexpr u32 Block = 16;
inline static constexpr u32 BlockMask = Block - 1;
inline static constexpr u32 Expand = 128;
typedef Key key_type;
typedef Value value_type;
typedef MemoryAllocator memory_allocator;
typedef SwissTable<Key, Value, MemoryAllocator> this_type;
typedef u32 size_type;
typedef size_type iterator;
typedef hash_detail::type_traits<value_type> value_traits;
typedef typename value_traits::pointer value_pointer;
typedef typename value_traits::const_pointer const_value_pointer;
typedef typename value_traits::reference value_reference;
typedef typename value_traits::const_reference const_value_reference;
typedef typename value_traits::pointer pointer;
typedef typename value_traits::const_pointer const_pointer;
typedef typename value_traits::reference reference;
typedef typename value_traits::const_reference const_reference;
typedef typename value_traits::param_type value_param_type;
typedef typename value_traits::const_param_type const_value_param_type;
typedef typename value_traits::lvalue value_lvalue_type;
typedef hash_detail::type_traits<key_type> key_traits;
typedef typename key_traits::pointer key_pointer;
typedef typename key_traits::const_pointer const_key_pointer;
typedef typename key_traits::reference key_reference;
typedef typename key_traits::const_reference const_key_reference;
typedef typename key_traits::param_type key_param_type;
typedef typename key_traits::const_param_type const_key_param_type;
typedef typename key_traits::lvalue key_lvalue_type;
struct Control
{
inline bool isOccupied() const
{
return 0 == (0x80U & control_);
}
uint8_t control_;
};
SwissTable();
explicit SwissTable(size_type capacity);
~SwissTable();
void initialize(size_type capacity);
size_type capacity() const;
size_type size() const;
void clear();
bool valid(size_type pos) const;
size_type find(const_key_param_type key) const;
bool insert(const_key_param_type key, const_value_param_type value);
void erase(const_key_param_type key);
void eraseAt(size_type pos);
void swap(this_type& rhs);
iterator begin() const;
iterator end() const;
iterator next(iterator pos) const;
reference getValue(size_type pos);
const_reference getValue(size_type pos) const;
key_reference getKey(size_type pos);
const_key_reference getKey(size_type pos) const;
private:
SwissTable(const SwissTable&) = delete;
SwissTable& operator=(const SwissTable&) = delete;
size_type calcHash_(const_key_param_type key) const;
static inline size_type align(size_type x, size_type mask);
void expand(size_type capacity);
size_type find_(const_key_param_type key, size_type hash) const;
void insert_(value_lvalue_type key, key_lvalue_type value);
void create(size_type capacity);
void destroy();
static u32 leastSignificantBit(u32 x);
static u32 next_pos(u32 x);
size_type capacity_;
size_type size_;
Control* controls_;
key_type* keys_;
value_type* values_;
};
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::SwissTable()
:capacity_(0)
, size_(0)
, controls_(nullptr)
, keys_(nullptr)
, values_(nullptr)
{}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::SwissTable(size_type capacity)
:capacity_(0)
, size_(0)
, controls_(nullptr)
, keys_(nullptr)
, values_(nullptr)
{
create(capacity);
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::~SwissTable()
{
destroy();
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::initialize(size_type capacity)
{
destroy();
create(capacity);
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::capacity() const
{
return capacity_;
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::size() const
{
return size_;
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::clear()
{
for (size_type i = 0; i < capacity_; ++i) {
if (controls_[i].isOccupied()) {
keys_[i].~key_type();
values_[i].~value_type();
}
}
size_ = 0;
::memset(controls_, 0, sizeof(Control) * capacity_);
}
template<class Key, class Value, class MemoryAllocator>
bool SwissTable<Key, Value, MemoryAllocator>::valid(size_type pos) const
{
return (pos < capacity_);
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::find(const_key_param_type key) const
{
return (0 < capacity_) ? find_(key, calcHash_(key)) : end();
}
template<class Key, class Value, class MemoryAllocator>
bool SwissTable<Key, Value, MemoryAllocator>::insert(const_key_param_type key, const_value_param_type value)
{
u32 hash = calcHash_(key);
if (0 < capacity_ && find_(key, hash) != end()) {
return false;
}
{
if (capacity_ <= size_) {
expand(capacity_);
}
HASSERT(size_<capacity_);
}
{
#if 0
u32 h = hash & 0x7FUL;
u32 start = (hash & (~BlockMask)) % capacity_;
__m128i vh = _mm_set1_epi8((char)0x80);
u32 pos = start;
do {
HASSERT(0 == (pos & BlockMask));
__m128i x = _mm_loadu_si128((const __m128i*)(controls_ + pos));
__m128i c = _mm_and_epi32(x, vh);
for (u32 i = static_cast<u32>(_mm_movemask_epi8(c)); i; i = next_pos(i)) {
u32 p = pos + leastSignificantBit(i);
if (!controls_[p].isOccupied()) {
controls_[p].control_ = static_cast<u8>(h);
construct(&keys_[p], std::move(key));
construct(&values_[p], std::move(value));
++size_;
return true;
}
}
pos += Block;
pos = (pos < capacity_) ? pos : pos - capacity_;
} while (start != pos);
#else
u32 h = hash & 0x7FUL;
u32 start = hash % capacity_;
u32 pos = start;
do {
if (!controls_[pos].isOccupied()) {
controls_[pos].control_ = static_cast<u8>(h);
construct(&keys_[pos], std::move(key));
construct(&values_[pos], std::move(value));
++size_;
return true;
}
++pos;
pos = (pos < capacity_) ? pos : pos - capacity_;
} while (start != pos);
#endif
}
return false;
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::erase(const_key_param_type key)
{
u32 pos = find(key);
if(pos == end()){
return;
}
eraseAt(pos);
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::eraseAt(size_type pos)
{
HASSERT(0<size_);
HASSERT(pos<capacity_);
HASSERT(controls_[pos].isOccupied());
keys_[pos].~key_type();
values_[pos].~value_type();
controls_[pos].control_ = 0xFEU;
--size_;
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::swap(this_type& rhs)
{
(std::swap)(capacity_, rhs.capacity_);
(std::swap)(size_, rhs.size_);
(std::swap)(controls_, rhs.controls_);
(std::swap)(keys_, rhs.keys_);
(std::swap)(values_, rhs.values_);
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::iterator SwissTable<Key, Value, MemoryAllocator>::begin() const
{
for(u32 i=0; i<capacity_; ++i){
if(controls_[i].isOccupied()){
return i;
}
}
return end();
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::iterator SwissTable<Key, Value, MemoryAllocator>::end() const
{
return Invalid;
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::iterator SwissTable<Key, Value, MemoryAllocator>::next(iterator pos) const
{
for(u32 i=pos+1; i<capacity_; ++i){
if(controls_[i].isOccupied()){
return i;
}
}
return end();
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::reference SwissTable<Key, Value, MemoryAllocator>::getValue(size_type pos)
{
HASSERT(0 <= pos && pos < capacity_);
return values_[pos];
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::const_reference SwissTable<Key, Value, MemoryAllocator>::getValue(size_type pos) const
{
HASSERT(0 <= pos && pos < capacity_);
return values_[pos];
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::key_reference SwissTable<Key, Value, MemoryAllocator>::getKey(size_type pos)
{
HASSERT(0 <= pos && pos < capacity_);
return keys_[pos];
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::const_key_reference SwissTable<Key, Value, MemoryAllocator>::getKey(size_type pos) const
{
HASSERT(0 <= pos && pos < capacity_);
return keys_[pos];
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::calcHash_(const_key_param_type key) const
{
return hash_detail::calcHash(key);
}
template<class Key, class Value, class MemoryAllocator>
inline SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::align(size_type x, size_type mask)
{
return (x + mask) & (~mask);
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::expand(size_type capacity)
{
u32 newCapacity = hash_detail::next_prime(capacity);
this_type tmp;
tmp.create(newCapacity);
for (size_type i = 0; i < capacity_; ++i) {
if (controls_[i].isOccupied()) {
tmp.insert_(std::move(keys_[i]), std::move(values_[i]));
}
}
tmp.swap(*this);
}
template<class Key, class Value, class MemoryAllocator>
SwissTable<Key, Value, MemoryAllocator>::size_type SwissTable<Key, Value, MemoryAllocator>::find_(const_key_param_type key, size_type hash) const
{
HASSERT(0 == (capacity_ & BlockMask));
u32 h = hash & 0x7FUL;
u32 start = (hash & (~BlockMask))%capacity_;
__m128i vh = _mm_set1_epi8(static_cast<int8_t>(h));
u32 pos = start;
do {
HASSERT(0 == (pos & BlockMask));
__m128i x = _mm_loadu_si128((const __m128i*)(controls_ + pos));
__m128i c = _mm_cmpeq_epi8(x, vh);
for (u32 i = static_cast<u32>(_mm_movemask_epi8(c)); i; i = next_pos(i)) {
u32 p = pos + leastSignificantBit(i);
if (key == keys_[p]) {
return p;
}
}
pos += Block;
pos = (pos < capacity_) ? pos : pos - capacity_;
} while (start != pos);
return end();
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::insert_(value_lvalue_type key, key_lvalue_type value)
{
#if 0
u32 hash = calcHash_(key);
HASSERT(find_(key, hash) == end());
HASSERT(size_ < capacity_);
u32 h = hash & 0x7FUL;
u32 start = (hash & (~BlockMask)) % capacity_;
__m128i vh = _mm_set1_epi8((char)0x80);
u32 pos = start;
do {
HASSERT(0 == (pos & BlockMask));
__m128i x = _mm_loadu_si128((const __m128i*)(controls_ + pos));
__m128i c = _mm_and_epi32(x, vh);
for (u32 i = static_cast<u32>(_mm_movemask_epi8(c)); i; i = next_pos(i)) {
u32 p = pos + leastSignificantBit(i);
if (!controls_[p].isOccupied()) {
controls_[p].control_ = static_cast<u8>(h);
construct(&keys_[p], std::move(key));
construct(&values_[p], std::move(value));
++size_;
return;
}
}
pos += Block;
pos = (pos < capacity_) ? pos : pos - capacity_;
} while (start != pos);
return;
#else
u32 hash = calcHash_(key);
HASSERT(find_(key, hash) == end());
HASSERT(size_ < capacity_);
u32 h = hash & 0x7FUL;
u32 start = hash % capacity_;
u32 pos = start;
do {
if (!controls_[pos].isOccupied()) {
controls_[pos].control_ = static_cast<u8>(h);
construct(&keys_[pos], std::move(key));
construct(&values_[pos], std::move(value));
++size_;
return;
}
++pos;
pos = (pos < capacity_) ? pos : pos - capacity_;
} while (start != pos);
return;
#endif
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::create(size_type capacity)
{
HASSERT(capacity_ <= 0);
HASSERT(size_ <= 0);
HASSERT(nullptr == controls_);
HASSERT(nullptr == keys_);
HASSERT(nullptr == values_);
capacity = align(capacity, BlockMask);
u64 size_controls = capacity * sizeof(Control);
u64 size_keys = capacity * sizeof(key_type);
u64 size_values = capacity * sizeof(value_type);
u64 total_size = size_controls + size_keys + size_values;
u8* memory = reinterpret_cast<u8*>(HALLOCATOR_MALLOC(memory_allocator, total_size));
::memset(memory, 0, total_size);
controls_ = reinterpret_cast<Control*>(memory);
keys_ = reinterpret_cast<key_type*>(memory + size_controls);
values_ = reinterpret_cast<value_type*>(memory + size_controls + size_keys);
::memset(controls_, 0x80, size_controls);
capacity_ = capacity;
}
template<class Key, class Value, class MemoryAllocator>
void SwissTable<Key, Value, MemoryAllocator>::destroy()
{
for (u32 i = 0; i < capacity_; ++i) {
if (controls_[i].isOccupied()) {
values_[i].~value_type();
keys_[i].~key_type();
}
}
HALLOCATOR_FREE(memory_allocator, controls_);
capacity_ = 0;
size_ = 0;
controls_ = nullptr;
keys_ = nullptr;
values_ = nullptr;
}
template<class Key, class Value, class MemoryAllocator>
u32 SwissTable<Key, Value, MemoryAllocator>::leastSignificantBit(u32 x)
{
#if defined(_MSC_VER)
unsigned long index;
return _BitScanForward(&index, x) ? (u32)index : 0;
#elif defined(__GNUC__)
return (0 != x) ? (__builtin_ctzl(x)) : 0;
#else
if (0 == v) {
return 0U;
}
u32 count = (x & 0xAAAAAAAAUL) ? 0x01UL : 0;
if (x & 0xCCCCCCCCU) {
count |= 0x02UL;
}
if (x & 0xF0F0F0F0U) {
count |= 0x04UL;
}
if (x & 0xFF00FF00U) {
count |= 0x08UL;
}
return 31UL - ((x & 0xFFFF0000UL) ? count | 0x10UL : count);
#endif
}
template<class Key, class Value, class MemoryAllocator>
u32 SwissTable<Key, Value, MemoryAllocator>::next_pos(u32 x)
{
return x & (x - 1);
}
}
結果
環境 | |
---|---|
OS | Windows 11 Pro 23H2 |
CPU | CPU: Intel Core i5-14500 2.60 GHz |
Memory | 128GB (DDR5) |
ランダムな文字列をキーと値にした連想配列でテストします, STLではstd::unordered_map<std::string, std::string>
とした場合です.
100000個のサンプルで10回の平均をとります.
find0
は挿入したキーを全て検索した結果, find1
は半分のキーを削除した後で, サンプルのキーを全て検索した結果になります. 時間の単位はナノセカンドです.
HashMap | Hopscotch | RobinHood | SwissTable | std::unordered_map | |
---|---|---|---|---|---|
insert | 0.0343334 | 0.0443963 | 0.0894508 | 1.10724 | 0.0274962 |
find0 | 0.00319431 | 0.00406003 | 0.00412159 | 0.00332366 | 0.00480711 |
erase | 0.00377636 | 0.00634865 | 0.0107115 | 0.00580226 | 0.00553653 |
find1 | 0.00221175 | 0.00308796 | 0.00601705 | 0.948603 | 0.00337689 |
まとめ
挿入が非常に遅いのは実装が悪いと思います. 存在するものを検索することは速いですが, ないものを検索する場合の対策はないので遅いです. RobinHoodのように剰余演算対策をすれば, より速くなるかと思います.