More than 1 year has passed since last update.

goで文字列比較の速度を検証

Last updated at 2022-08-25Posted at 2022-08-25

はじめに

goで文字列比較をする際にどの方法が早いのか気になったので検証してみました。

比較条件は以下とします

文字列の長さを固定
完全に一致する2つの文字列を比較
10,000,000回(1千万回)繰り返した時の実行時間の平均を測定（go test benchコマンド）
正規表現は含まない

比較結果

結果：どれを使ってもあまり変わらないことが分かりました
※ 完全に一致する文字列で検証した場合

凡例は以下の比較方法を表しています。
具体的な実装は各リンク先をご参照ください。

①　Equal : 文字列A == 文字列B
②　Byte : bytes.Compare(文字列A, 文字列B)
③　HasPrefix : strings.HasPrefix(文字列A, 文字列B)
④　HasSuffix : strings.HasSuffix(文字列A, 文字列B)
⑤　Map : map[string]struct{}{文字列A}[文字列B]

測定結果の表

文字列の長さに対して比較の実行時間[ns/op]を示しています。

	Equal	Byte	HasPrefix	HasSuffix	Map
10 文字	6	4	6	6	9
25 文字	9	5	7	7	10
50 文字	8	7	9	8	15
75 文字	7	6	7	8	12
100 文字	8	8	8	11	16
250 文字	12	12	12	14	19
500 文字	19	19	16	18	23
750 文字	21	27	20	21	27
1000 文字	29	31	29	32	40
2500 文字	59	74	63	60	64
5000 文字	114	136	107	108	120
7500 文字	156	195	179	162	158
10000 文字	229	245	206	202	212
25000 文字	636	760	635	720	788
50000 文字	1332	1479	1357	1288	1286
75000 文字	2040	2280	2028	2043	2157
100000 文字	2507	2966	2669	2425	2576

なお、参考までに各文字列の長さに対するgo test benchの出力例を示します。

D:\strmatch>go test -bench . -benchmem -benchtime 10000000x -cpu 1
goos: windows
goarch: amd64
pkg: strmatch
cpu: Intel(R) Core(TM) i7-10870H CPU @ 2.20GHz
Benchmark_string_Equal          10000000              2507 ns/op               0 B/op          0 allocs/op
Benchmark_byte_Equal            10000000              2966 ns/op               0 B/op          0 allocs/op
Benchmark_string_HasPrefix      10000000              2669 ns/op               0 B/op          0 allocs/op
Benchmark_string_HasSuffix      10000000              2425 ns/op               0 B/op          0 allocs/op
Benchmark_map                   10000000              2576 ns/op               0 B/op          0 allocs/op
PASS
ok      strmatch        132.331s

比較用コード

前提

テスト対象の文字列は以下のようにして生成します。
コンパイル時の最適化が走らないように文字列は実行時に生成し、strings.Clone()を使用して比較する2つの文字列が同一のオブジェクトを参照しないようにしています。
STR_LENGTHは文字列の長さであり、テスト毎に変更しています。

var (
	STR_LENGTH = 75000
	test_str   = RandomStr(STR_LENGTH)
	target_str = strings.Clone(test_str)
)

func RandomStr(digit int) string {
	b := make([]byte, digit)
	if _, err := rand.Read(b); err != nil {
		return ""
	}
	var result string
	for _, v := range b {
		result += string(v%byte(94) + 33)
	}
	return result
}

また、比較結果を代入した変数が使われていないというgoのエラーを抑制するために以下の関数を定義しています。

func discard(s any) {
}

①　文字列A == 文字列B

func Benchmark_string_Equal(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = (test_str == target_str)
	}
	b.StopTimer()
	discard(result)
}

②　bytes.Compare(文字列A, 文字列B)

func Benchmark_byte_Equal(b *testing.B) {
	result := 1
	target_str_byte := []byte(target_str)
	test_str_byte := []byte(test_str)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = bytes.Compare(target_str_byte, test_str_byte)
	}
	b.StopTimer()
	discard(result)
}

③　strings.HasPrefix(文字列A, 文字列B)

func Benchmark_string_HasPrefix(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = strings.HasPrefix(test_str, target_str)
	}
	b.StopTimer()
	discard(result)
}

④　strings.HasSuffix(文字列A, 文字列B)

func Benchmark_string_HasSuffix(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		strings.HasSuffix(test_str, target_str)
	}
	b.StopTimer()
	discard(result)
}

⑤　map[string]struct{}{文字列A}[文字列B]

func Benchmark_map(b *testing.B) {
	result := false
	dict := map[string]struct{}{
		target_str: struct{}{},
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		_, result = dict[test_str]
	}
	b.StopTimer()
	discard(result)
}

テストコード全体

package main

import (
	"bytes"
	"crypto/rand"
	"strings"
	"testing"
)

var (
	STR_LENGTH = 75000
	test_str   = RandomStr(STR_LENGTH)
	target_str = strings.Clone(test_str)
)

func discard(s any) {
}

func RandomStr(digit int) string {
	b := make([]byte, digit)
	if _, err := rand.Read(b); err != nil {
		return ""
	}
	var result string
	for _, v := range b {
		result += string(v%byte(94) + 33)
	}
	return result
}

func Benchmark_string_Equal(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = (test_str == target_str)
	}
	b.StopTimer()
	discard(result)
}

func Benchmark_byte_Equal(b *testing.B) {
	result := 1
	target_str_byte := []byte(target_str)
	test_str_byte := []byte(test_str)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = bytes.Compare(target_str_byte, test_str_byte)
	}
	b.StopTimer()
	discard(result)
}

func Benchmark_string_HasPrefix(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		result = strings.HasPrefix(test_str, target_str)
	}
	b.StopTimer()
	discard(result)
}

func Benchmark_string_HasSuffix(b *testing.B) {
	result := false
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		strings.HasSuffix(test_str, target_str)
	}
	b.StopTimer()
	discard(result)
}

func Benchmark_map(b *testing.B) {
	result := false
	dict := map[string]struct{}{
		target_str: struct{}{},
	}
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		_, result = dict[test_str]
	}
	b.StopTimer()
	discard(result)
}

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

goで文字列比較の速度を検証

はじめに

比較結果

比較用コード

前提

① 文字列A == 文字列B

② bytes.Compare(文字列A, 文字列B)

③ strings.HasPrefix(文字列A, 文字列B)

④ strings.HasSuffix(文字列A, 文字列B)

⑤ map[string]struct{}{文字列A}[文字列B]