More than 3 years have passed since last update.

[outdated] golangのgenericsでB+ Treeを書いてみた

Last updated at 2021-06-22Posted at 2020-06-19

2020/06/19時点での情報です。
仕様が変更されたため、以下のコードは参考にしないでください

golangのgenerics

先日、golangのgenericsに進展が有りました。

contractは無くなりinterfaceでconstraintを指定するように変更

https://tenntenn.dev/ja/posts/contract-interface/
こちらのブログによると以前からcontractを無くすという話はあったようですね。

go2向けgo playground でgenericsが試せるように

お手軽なので、試しに書きたいという方におすすめ

go2をgo1にコンパイルするツール

goのgitレポジトリのdev.go2goブランチからビルドしたgoバイナリを使用するとgo2をgo1にコンパイルできます。
そのまま実行することも可能です。

# 実行
go tool go2go run x.go2

# go1へコンパイル
go tool go2go translate x.go2

B+ Treeを書く

個人的にgenericsが欲しい場面としてarray,map以外のデータ構造を使う時を考えていたので、B+ Treeを書いてみることにしました。
B+ Treeをチョイスした理由としてはDBのインデックスでいつも使っているのに一度も書いたことないのどうなんだと前々から思っていたためです。

B+ Treeとは

詳細な説明は省きますが、B+ TreeはB Treeの亜種です。
BTreeは二分探索木の各ノードが2本以上の枝を持つもので、B+ TreeはBTreeの内部ノードにはキーのみを保存し、実データは全て葉ノードに保存します。
この葉ノードを連結リストでつなぐことにより、範囲検索が効率的に行えるという特徴が有ります。

B+木のノードが配列として構成される場合、挿入や削除で配列の要素をずらす必要が生じ、性能が悪くなる。そのため、ノード内の要素は2分木やB+木で構成するのが望ましい。

wikipediaにはこのように書いてありますが、簡単のため今回はsliceを使って実装します

B+ Treeのコード

上述したようにB+ Treeを書くのは初なので、間違っていたらごめんなさい。
また、削除は実装していません。

// tree.go2
package main

type comperable(type T) interface {
	LessThan(a T) bool
	Equal(a T) bool
}

type node(type K comperable, V interface{}) struct { // 一部の型引数だけconstraintを持つことは出来ない
	key K // 子ノードのkeyで最大のものが入るようにする。

	children []*node(K, V)
	value    V
	prev     *node(K, V)
	next     *node(K, V)
}

type BPTree(type K comperable, V interface{}) struct {
	order int
	root  *node(K, V)
}

func (n *node(K, V)) isLeaf() bool { // 型パラメータが引数などに未登場の場合でも指定しないといけない
	return len(n.children) == 0
}

func (n *node(K, V)) fixKey() {
	if len(n.children) == 0 { // 正常であれば呼ばれないはず
		return
	}
	n.key = n.children[len(n.children)-1].key
}

func linkNode(type K comperable, V interface{})(prev, middle, next *node(K, V)) {
	if prev != nil {
		prev.next = middle
	}
	middle.prev = prev
	middle.next = next
	if next != nil {
		next.prev = middle
	}
}

func NewBPTree(type K comperable, V interface{})(order int) *BPTree(K, V) {
	if !(order > 1) {
		panic("order must be greater than 1")
	}
	return &BPTree(K, V){
		order: order,
	}
}

func (t *BPTree(K, V)) Find(key K) (V, bool) {
	var zero V // ゼロ値を作るsyntaxはない。*new(V)とか
	if t.root == nil {
		return zero, false
	}
	leaf, found := t.findNearestLeaf(key, t.root)
	if found && leaf.key.Equal(key) {
		return leaf.value, true
	}
	return zero, false
}

// 指定したキー以上の最小元を返す
func (t *BPTree(K, V)) findNearestLeaf(key K, current *node(K, V)) (*node(K, V), bool) {
	if current.isLeaf() {
		return current, true
	}

	for _, child := range current.children {
		if key.LessThan(child.key) || key.Equal(child.key) {
			return t.findNearestLeaf(key, child) // 末尾再帰最適化されてほしいなぁ
		}
	}

	return nil, false
}

func (t *BPTree(K, V)) Between(from, to K) []V {
	result := make([]V, 0)
	if t.root == nil {
		return result
	}
	if to.LessThan(from) {
	  from, to = to, from
	}

	current, found := t.findNearestLeaf(from, t.root)
	if !found {
	  return result
	}
	for current != nil && !to.LessThan(current.key) { // !(to < current) <=> current <= to
		result = append(result, current.value)
		current = current.next
	}
	return result
}

func (t *BPTree(K, V)) Put(key K, value V) {
	if t.root == nil {
		newNode := &node(K, V){key: key, value: value}
		t.root = &node(K, V){
			key: key,
			children: []*(node(K, V)){ // []*node(K, V) だとエラーになる。これだとどういう解釈になるんだ？
				newNode,
			},
		}
	}

	var inserted *node(K, V)
	if t.root.children[len(t.root.children)-1].key.LessThan(key) { // 末尾に追加する場合は別処理
		inserted = t.putLast(key, value, t.root)
	} else {
		inserted = t.put(key, value, t.root)
	}

	if inserted != nil {
		t.root = &node(K, V){
			key: t.root.key,
			children: []*(node(K, V)){
				t.root,
			},
		}
		t.putInNode(inserted, t.root)
	}

}

func (t *BPTree(K, V)) putLast(key K, value V, current *node(K, V)) (newInserted *node(K, V)) {
	if current.isLeaf() {
		newNode := &node(K, V){key: key, value: value}
		linkNode(current, newNode, nil)
		return newNode
	}
	current.key = key

	inserted := t.putLast(key, value, current.children[len(current.children)-1])
	if inserted == nil {
		return nil
	}

	current.children = append(current.children, inserted)
	if len(current.children) <= t.order {
		return nil
	}

	middleIndex := (t.order + 1) / 2

	newNode := &node(K, V){
		children: current.children[middleIndex:],
		key:      key,
	}

	current.children = current.children[:middleIndex]
	current.fixKey()

	return newNode
}

func (t *BPTree(K, V)) put(key K, value V, current *node(K, V)) (newInserted *node(K, V)) {
	if current.isLeaf() {
		if current.key.Equal(key) {
			current.value = value
			return nil
		}

		newNode := &node(K, V){key: key, value: value}
		linkNode(current.prev, newNode, current)
		return newNode
	}

	for _, next := range current.children {
		if key.LessThan(next.key) || key.Equal(next.key) {
			inserted := t.put(key, value, next)
			if inserted != nil {
				inserted = t.putInNode(inserted, current)
			}
			return inserted
		}
	}

	panic("bug") // 末尾に追加する場合なので来ない
}

func (t *BPTree(K, V)) putInNode(inserted, current *node(K, V)) (newInserted *node(K, V)) {
	newChildren := make([]*(node(K, V)), 0, len(current.children)+1)

	isInserted := false
	for _, n := range current.children {
		if !isInserted && inserted.key.LessThan(n.key) {
			newChildren = append(newChildren, inserted)
			isInserted = true
		}
		newChildren = append(newChildren, n)
	}
	if !isInserted {
		newChildren = append(newChildren, inserted)
	}

	if len(newChildren) <= t.order {
		current.children = newChildren
		current.fixKey()
		return nil
	}

	middleIndex := (t.order + 1) / 2
	current.children = newChildren[:middleIndex]
	current.fixKey()

	newNode := &node(K, V){
		children: newChildren[middleIndex:],
	}
	newNode.fixKey()

	return newNode
}

type IntKey int

func (k IntKey) LessThan(v IntKey) bool {
	return int(k) < int(v)
}

func (k IntKey) Equal(v IntKey) bool {
	return int(k) == int(v)
}

type IntBPTree = BPTree(IntKey, int)

func NewIntBPTree(order int) *IntBPTree {
  return NewBPTree(IntKey, int)(order)
}

実行方法

せっかくなので、go2のまま実行するのではなく、go1からBPTreeを使うようにしてみます。

// main.go
package main

import (
	"fmt"
	"log"
	"math/rand"
)

func assertTrue(v bool, msg string, vs ...interface{}) {
	if !v {
		panic(fmt.Sprintf(msg, vs...))
	}
}

func main() {
	const (
		defaultOrder       = 10
		insertCount        = 1000
		randomAttemptCount = 10
	)

	for order := 2; order <= defaultOrder; order++ {
		// ランダムにたくさん入れる
		for i := 0; i < randomAttemptCount; i++ {
			t := NewIntBPTree(order)
			rand.Seed(int64(i))
			inserted := make([]int, insertCount)
			for i := range inserted {
				x := rand.Int()
				t.Put(IntKey(x), x)
				inserted[i] = x
			}

			for _, x := range inserted {
				v, found := t.Find(IntKey(x))
				assertTrue(found, "random; %d not found", x)
				assertTrue(v == x, "random; %d wrong", x)
			}
		}
	}
	log.Println("success")
}

go tool go2go translate tree.go2
go run tree.go main.go
// -> 2020/06/19 14:28:27 success

ハマったところ、注意点等

上のコード中にも書かれていますが、抜き出してみます

一部の型引数だけconstraintを持つことは出来ない

制約が何もない場合は interface{} を指定する必要が有ります。

型パラメータが引数などに未登場の場合でも指定しないといけない

ちょっと面倒くさいですね。

[]*node(K, V) だとエラーになる。

[]*(node(K, V)) にしないといけないみたいです。
これだとどういう解釈になるんですかね。

ゼロ値を作るsyntaxは（今の所）ない

パラメータ化された型のゼロ値を作るsyntaxは今のところ無いので、var zero T や *new(T) などと書く必要が有ります。
the-zero-value

感想

正直contractはinterfaceとの違いはすぐには分からなかったので、interfaceをconstraintとして使う現在の実装の方が分かりやすいなと思いました。
一方で、type制約（今回は使用しませんでした。後述）を書いたinterfaceは元々の用途（変数の型等）には使えないようなので、若干混乱する人も居るかもしれません。
type-lists-in-interface-types

丸括弧の登場機会が多いため若干見づらい（カリー化されてるように見える）気もしますが、そこはsyntax highlightに頑張ってもらう感じでしょうか。
何はともあれ、とてもいい感じなので（ボキャ貧）正式リリースが待ち遠しいです。

type制約

// 上のページより抜粋
type Ordered interface {
	type int, int8, int16, int32, int64,
		uint, uint8, uint16, uint32, uint64, uintptr,
		float32, float64,
		string
}

このようなinterfaceを用いることで、Ordered制約が付いた型パラメータ（正確にはその大元の型）はリスト中の型のどれかでないといけないことを記述できます。
これを用いると、リスト中の型に共通の演算子を使用することができます。（<や==など）
なお、Orderedを通常のinterfaceのように変数の型に指定したりはできません。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up