MySQL

MySQLの照合順序

More than 5 years have passed since last update.


やってみた

MySQLの照合順序、UTF-8の日本語で使える照合順序はいくつかあるけど、

実際にどんなマッチの仕方をするのかわからなかったので、ちょっとやってみた。

ちなみに、MySQLのUTF-8で日本語を扱う場合に使用可能な照合順序はしたの3つ。


  • utf8_bin

  • utf8_general_ci

  • utf8_unicode_ci

確認したかったのは、半角・全角、小文字・大文字がどんな感じでマッチするのかという事。


ながーいので、結果を


  • utf8_bin

    キャラクターコードが完全に一致するもののみマッチする。


  • utf8_general_ci

    アルファベットの大文字・小文字は区別せずにマッチする。

    ただし、区別しない文字は、半角は半角の大文字・小文字、全角は全角の大文字・小文字のみ。

    半角小文字と全角小文字同士はマッチしない。



  • utf8_unicode_ci

    下記がマッチする。


    • アルファベットの大文字・小文字(全半角混合)

    • ひらがなの大文字・小文字とカタカナの大文字・小文字 (例えば、'あ'で検索すると、'ぁ', 'ア', 'ァ'もマッチする)




テーブル定義

テーブル定義はこんな感じ。


ddl.sql

drop table if exists `utf8_bin`;

create table `utf8_bin`
( `id` int(11) NOT NULL auto_increment,
`str` varchar(255),
`num` int(11),
primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_bin
;

drop table if exists `utf8_general_ci`;
create table `utf8_general_ci`
( `id` int(11) NOT NULL auto_increment,
`str` varchar(255),
`num` int(11),
primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_general_ci
;

drop table if exists `utf8_unicode_ci`;
create table `utf8_unicode_ci`
( `id` int(11) NOT NULL auto_increment,
`str` varchar(255),
`num` int(11),
primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_unicode_ci
;



テストデータ

テストデータはこんな感じ。

とりあえず、気になってた文字の比較。


dml_testdata.sql

insert into `utf8_bin`(`str`, `num`)

values
('a', null),
('a', null),
('A', null),
('A', null),
('あ', null),
('ぁ', null),
('ア', null),
('ァ', null),
('高橋', null),
('髙橋', null),
('-', null),
('−', null),
('+', null),
('+', null),
('0', null),
('0', null),
('1', null),
('1', null),
(null, 0),
(null, 1),
(null, 9),
(null, 100)
;

insert into `utf8_general_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;

insert into `utf8_unicode_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;



確認してみた

これ、途中で気づいたんだけど、プロシージャで書けばよかった…

思いつきで書き始めたけど、なんか長くなってしまった。


dml_test.sql

select '## str = a(半角)' as '実行内容';

select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = あ(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'あ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'あ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'あ'
;
show warnings;

select '## str = ぁ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ぁ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ぁ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ぁ'
;
show warnings;

select '## str = ア(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ア'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ア'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ア'
;
show warnings;

select '## str = ァ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ァ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ァ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ァ'
;
show warnings;

select '## str = 高橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '高橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '高橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '高橋'
;
show warnings;

select '## str = 髙橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '髙橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '髙橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '髙橋'
;
show warnings;

select '## str = -(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '-'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '-'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '-'
;
show warnings;

select '## str = −(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '−'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '−'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '−'
;
show warnings;

select '## str = +(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = +(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = 0(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 0(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 1(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str = 1(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str like \'高%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '高%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '高%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '高%'
;
show warnings;

select '## str like \'髙%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '髙%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '髙%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '髙%'
;
show warnings;

select '## num = 0' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 0
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 0
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 0
;
show warnings;

select '## num = a(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num = AAA(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'AAA'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'AAA'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'AAA'
;
show warnings;

select '## num = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num like \'1%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like '1%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like '1%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like '1%'
;
show warnings;

select '## num like \'a%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like 'a%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like 'a%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like 'a%'
;
show warnings;



テスト結果

これもまただらだらいきます(;´∀`)


str = a(半角)

table
id
str

utf8_bin
1
a

utf8_general_ci
1
a

utf8_general_ci
3
A

utf8_unicode_ci
1
a

utf8_unicode_ci
2

utf8_unicode_ci
3
A

utf8_unicode_ci
4


  • エラー無し


str = A(半角)

table
id
str

utf8_bin
3
A

utf8_general_ci
1
a

utf8_general_ci
3
A

utf8_unicode_ci
1
a

utf8_unicode_ci
2

utf8_unicode_ci
3
A

utf8_unicode_ci
4


  • エラー無し


str = a(全角)

table
id
str

utf8_bin
2

utf8_general_ci
2

utf8_general_ci
4

utf8_unicode_ci
1
a

utf8_unicode_ci
2

utf8_unicode_ci
3
A

utf8_unicode_ci
4


  • エラー無し


str = A(全角)

table
id
str

utf8_bin
4

utf8_general_ci
2

utf8_general_ci
4

utf8_unicode_ci
1
a

utf8_unicode_ci
2

utf8_unicode_ci
3
A

utf8_unicode_ci
4


  • エラー無し


str = あ(大文字)

table
id
str

utf8_bin
5

utf8_general_ci
5

utf8_unicode_ci
5

utf8_unicode_ci
6

utf8_unicode_ci
7

utf8_unicode_ci
8


  • エラー無し


str = ぁ(小文字)

table
id
str

utf8_bin
6

utf8_general_ci
6

utf8_unicode_ci
5

utf8_unicode_ci
6

utf8_unicode_ci
7

utf8_unicode_ci
8


  • エラー無し


str = ア(大文字)

table
id
str

utf8_bin
7

utf8_general_ci
7

utf8_unicode_ci
5

utf8_unicode_ci
6

utf8_unicode_ci
7

utf8_unicode_ci
8


  • エラー無し


str = ァ(小文字)

table
id
str

utf8_bin
8

utf8_general_ci
8

utf8_unicode_ci
5

utf8_unicode_ci
6

utf8_unicode_ci
7

utf8_unicode_ci
8


  • エラー無し


str = 高橋

table
id
str

utf8_bin
9
高橋

utf8_general_ci
9
高橋

utf8_unicode_ci
9
高橋


  • エラー無し


str = 髙橋

table
id
str

utf8_bin
10
髙橋

utf8_general_ci
10
髙橋

utf8_unicode_ci
10
髙橋


  • エラー無し


str = -(半角)

table
id
str

utf8_bin
11
-

utf8_general_ci
11
-

utf8_unicode_ci
11
-


  • エラー無し


str = −(全角)

table
id
str

utf8_bin
12

utf8_general_ci
12

utf8_unicode_ci
12


  • エラー無し


str = +(半角)

table
id
str

utf8_bin
13
+

utf8_general_ci
13
+

utf8_unicode_ci
13
+

utf8_unicode_ci
14


  • エラー無し


str = +(全角)

table
id
str

utf8_bin
14

utf8_general_ci
14

utf8_unicode_ci
13
+

utf8_unicode_ci
14


  • エラー無し


str = 0(半角)

table
id
str

utf8_bin
15
0

utf8_general_ci
15
0

utf8_unicode_ci
15
0

utf8_unicode_ci
16


  • エラー無し


str = 0(全角)

table
id
str

utf8_bin
16

utf8_general_ci
16

utf8_unicode_ci
15
0

utf8_unicode_ci
16


  • エラー無し


str = 1(半角)

table
id
str

utf8_bin
17
1

utf8_general_ci
17
1

utf8_unicode_ci
17
1

utf8_unicode_ci
18


  • エラー無し


str = 1(全角)

table
id
str

utf8_bin
18

utf8_general_ci
18

utf8_unicode_ci
17
1

utf8_unicode_ci
18


  • エラー無し


str like '高%'

table
id
str

utf8_bin
9
高橋

utf8_general_ci
9
高橋

utf8_unicode_ci
9
高橋


  • エラー無し


str like '髙%'

table
id
str

utf8_bin
10
髙橋

utf8_general_ci
10
髙橋

utf8_unicode_ci
10
髙橋


  • エラー無し


num = 0

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0


  • エラー無し


num = a(半角)

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0

show warnings;

Level
Code
Message

Warning
1292
Truncated incorrect DOUBLE value: 'a'

Warning
1292
Truncated incorrect DOUBLE value: 'a'

Warning
1292
Truncated incorrect DOUBLE value: 'a'


num = A(半角)

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0

show warnings;

Level
Code
Message

Warning
1292
Truncated incorrect DOUBLE value: 'A'

Warning
1292
Truncated incorrect DOUBLE value: 'A'

Warning
1292
Truncated incorrect DOUBLE value: 'A'


num = AAA(半角)

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0

show warnings;

Level
Code
Message --

Warning
1292
Truncated incorrect DOUBLE value: 'AAA'

Warning
1292
Truncated incorrect DOUBLE value: 'AAA'

Warning
1292
Truncated incorrect DOUBLE value: 'AAA'


num = a(全角)

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0

show warnings;

Level
Code
Message --

Warning
1292
Truncated incorrect DOUBLE value: 'a'

Warning
1292
Truncated incorrect DOUBLE value: 'a'

Warning
1292
Truncated incorrect DOUBLE value: 'a'


num = A(全角)

table
id
num

utf8_bin
19
0

utf8_general_ci
19
0

utf8_unicode_ci
19
0

show warnings;

Level
Code
Message --

Warning
1292
Truncated incorrect DOUBLE value: 'A'

Warning
1292
Truncated incorrect DOUBLE value: 'A'

Warning
1292
Truncated incorrect DOUBLE value: 'A'


num like '1%'

table
id
num

utf8_bin
20
1

utf8_bin
22
100

utf8_general_ci
20
1

utf8_general_ci
22
100

utf8_unicode_ci
20
1

utf8_unicode_ci
22
100


  • エラー無し


num like 'a%'


  • マッチせず

  • エラー無し


思ったこと

utf8_bin / utf8_general_ciはそんなに違いは無いのね。

大文字・小文字を判別するかしないか、くらいっぽい。

utf8_unicode_ciはヒトが考えるマッチ条件に似ているかな。

全半角をゴチャッと検索できるのはうれしいかも。システム的には厳密じゃないけど。

理解して使い分けて行かないと思わぬバグを生産しそうだけど、

とっても便利だという事がわかりました。