LoginSignup
177
162

More than 5 years have passed since last update.

MySQLの照合順序

Last updated at Posted at 2013-09-11

やってみた

MySQLの照合順序、UTF-8の日本語で使える照合順序はいくつかあるけど、
実際にどんなマッチの仕方をするのかわからなかったので、ちょっとやってみた。

ちなみに、MySQLのUTF-8で日本語を扱う場合に使用可能な照合順序はしたの3つ。

  • utf8_bin
  • utf8_general_ci
  • utf8_unicode_ci

確認したかったのは、半角・全角、小文字・大文字がどんな感じでマッチするのかという事。

ながーいので、結果を

  • utf8_bin
    キャラクターコードが完全に一致するもののみマッチする。

  • utf8_general_ci
    アルファベットの大文字・小文字は区別せずにマッチする。
    ただし、区別しない文字は、半角は半角の大文字・小文字、全角は全角の大文字・小文字のみ。
    半角小文字と全角小文字同士はマッチしない。

  • utf8_unicode_ci
    下記がマッチする。

    • アルファベットの大文字・小文字(全半角混合)
    • ひらがなの大文字・小文字とカタカナの大文字・小文字 (例えば、'あ'で検索すると、'ぁ', 'ア', 'ァ'もマッチする)

テーブル定義

テーブル定義はこんな感じ。

ddl.sql
drop table if exists `utf8_bin`;
create table `utf8_bin`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_bin
;

drop table if exists `utf8_general_ci`;
create table `utf8_general_ci`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_general_ci
;

drop table if exists `utf8_unicode_ci`;
create table `utf8_unicode_ci`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_unicode_ci
;

テストデータ

テストデータはこんな感じ。
とりあえず、気になってた文字の比較。

dml_testdata.sql
insert into `utf8_bin`(`str`, `num`)
values
('a', null),
('a', null),
('A', null),
('A', null),
('あ', null),
('ぁ', null),
('ア', null),
('ァ', null),
('高橋', null),
('髙橋', null),
('-', null),
('−', null),
('+', null),
('+', null),
('0', null),
('0', null),
('1', null),
('1', null),
(null, 0),
(null, 1),
(null, 9),
(null,  100)
;

insert into `utf8_general_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;

insert into `utf8_unicode_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;

確認してみた

これ、途中で気づいたんだけど、プロシージャで書けばよかった…
思いつきで書き始めたけど、なんか長くなってしまった。

dml_test.sql
select '## str = a(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = あ(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'あ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'あ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'あ'
;
show warnings;

select '## str = ぁ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ぁ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ぁ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ぁ'
;
show warnings;

select '## str = ア(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ア'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ア'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ア'
;
show warnings;

select '## str = ァ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ァ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ァ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ァ'
;
show warnings;

select '## str = 高橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '高橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '高橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '高橋'
;
show warnings;

select '## str = 髙橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '髙橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '髙橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '髙橋'
;
show warnings;

select '## str = -(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '-'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '-'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '-'
;
show warnings;

select '## str = −(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '−'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '−'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '−'
;
show warnings;

select '## str = +(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = +(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = 0(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 0(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 1(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str = 1(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str like \'高%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '高%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '高%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '高%'
;
show warnings;

select '## str like \'髙%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '髙%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '髙%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '髙%'
;
show warnings;

select '## num = 0' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 0
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 0
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 0
;
show warnings;

select '## num = a(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num = AAA(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'AAA'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'AAA'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'AAA'
;
show warnings;

select '## num = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num like \'1%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like '1%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like '1%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like '1%'
;
show warnings;

select '## num like \'a%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like 'a%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like 'a%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like 'a%'
;
show warnings;

テスト結果

これもまただらだらいきます(;´∀`)

str = a(半角)

table id str
utf8_bin 1 a
utf8_general_ci 1 a
utf8_general_ci 3 A
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = A(半角)

table id str
utf8_bin 3 A
utf8_general_ci 1 a
utf8_general_ci 3 A
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = a(全角)

table id str
utf8_bin 2
utf8_general_ci 2
utf8_general_ci 4
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = A(全角)

table id str
utf8_bin 4
utf8_general_ci 2
utf8_general_ci 4
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = あ(大文字)

table id str
utf8_bin 5
utf8_general_ci 5
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ぁ(小文字)

table id str
utf8_bin 6
utf8_general_ci 6
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ア(大文字)

table id str
utf8_bin 7
utf8_general_ci 7
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ァ(小文字)

table id str
utf8_bin 8
utf8_general_ci 8
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = 高橋

table id str
utf8_bin 9 高橋
utf8_general_ci 9 高橋
utf8_unicode_ci 9 高橋
  • エラー無し

str = 髙橋

table id str
utf8_bin 10 髙橋
utf8_general_ci 10 髙橋
utf8_unicode_ci 10 髙橋
  • エラー無し

str = -(半角)

table id str
utf8_bin 11 -
utf8_general_ci 11 -
utf8_unicode_ci 11 -
  • エラー無し

str = −(全角)

table id str
utf8_bin 12
utf8_general_ci 12
utf8_unicode_ci 12
  • エラー無し

str = +(半角)

table id str
utf8_bin 13 +
utf8_general_ci 13 +
utf8_unicode_ci 13 +
utf8_unicode_ci 14
  • エラー無し

str = +(全角)

table id str
utf8_bin 14
utf8_general_ci 14
utf8_unicode_ci 13 +
utf8_unicode_ci 14
  • エラー無し

str = 0(半角)

table id str
utf8_bin 15 0
utf8_general_ci 15 0
utf8_unicode_ci 15 0
utf8_unicode_ci 16
  • エラー無し

str = 0(全角)

table id str
utf8_bin 16
utf8_general_ci 16
utf8_unicode_ci 15 0
utf8_unicode_ci 16
  • エラー無し

str = 1(半角)

table id str
utf8_bin 17 1
utf8_general_ci 17 1
utf8_unicode_ci 17 1
utf8_unicode_ci 18
  • エラー無し

str = 1(全角)

table id str
utf8_bin 18
utf8_general_ci 18
utf8_unicode_ci 17 1
utf8_unicode_ci 18
  • エラー無し

str like '高%'

table id str
utf8_bin 9 高橋
utf8_general_ci 9 高橋
utf8_unicode_ci 9 高橋
  • エラー無し

str like '髙%'

table id str
utf8_bin 10 髙橋
utf8_general_ci 10 髙橋
utf8_unicode_ci 10 髙橋
  • エラー無し

num = 0

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0
  • エラー無し

num = a(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'

num = A(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'

num = AAA(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'

num = a(全角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'

num = A(全角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'

num like '1%'

table id num
utf8_bin 20 1
utf8_bin 22 100
utf8_general_ci 20 1
utf8_general_ci 22 100
utf8_unicode_ci 20 1
utf8_unicode_ci 22 100
  • エラー無し

num like 'a%'

  • マッチせず
  • エラー無し

思ったこと

utf8_bin / utf8_general_ciはそんなに違いは無いのね。
大文字・小文字を判別するかしないか、くらいっぽい。

utf8_unicode_ciはヒトが考えるマッチ条件に似ているかな。
全半角をゴチャッと検索できるのはうれしいかも。システム的には厳密じゃないけど。

理解して使い分けて行かないと思わぬバグを生産しそうだけど、
とっても便利だという事がわかりました。

177
162
2

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
177
162