1. Qiita
  2. 投稿
  3. MySQL

MySQLの照合順序

  • 134
    いいね
  • 2
    コメント
この記事は最終更新日から1年以上が経過しています。

やってみた

MySQLの照合順序、UTF-8の日本語で使える照合順序はいくつかあるけど、
実際にどんなマッチの仕方をするのかわからなかったので、ちょっとやってみた。

ちなみに、MySQLのUTF-8で日本語を扱う場合に使用可能な照合順序はしたの3つ。

  • utf8_bin
  • utf8_general_ci
  • utf8_unicode_ci

確認したかったのは、半角・全角、小文字・大文字がどんな感じでマッチするのかという事。

ながーいので、結果を

  • utf8_bin
    キャラクターコードが完全に一致するもののみマッチする。

  • utf8_general_ci
    アルファベットの大文字・小文字は区別せずにマッチする。
    ただし、区別しない文字は、半角は半角の大文字・小文字、全角は全角の大文字・小文字のみ。
    半角小文字と全角小文字同士はマッチしない。

  • utf8_unicode_ci
    下記がマッチする。

    • アルファベットの大文字・小文字(全半角混合)
    • ひらがなの大文字・小文字とカタカナの大文字・小文字 (例えば、'あ'で検索すると、'ぁ', 'ア', 'ァ'もマッチする)

テーブル定義

テーブル定義はこんな感じ。

ddl.sql
drop table if exists `utf8_bin`;
create table `utf8_bin`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_bin
;

drop table if exists `utf8_general_ci`;
create table `utf8_general_ci`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_general_ci
;

drop table if exists `utf8_unicode_ci`;
create table `utf8_unicode_ci`
( `id` int(11) NOT NULL auto_increment,
  `str` varchar(255),
  `num` int(11),
  primary key (`id`)
)
engine=InnoDB
default character set utf8 collate utf8_unicode_ci
;

テストデータ

テストデータはこんな感じ。
とりあえず、気になってた文字の比較。

dml_testdata.sql
insert into `utf8_bin`(`str`, `num`)
values
('a', null),
('a', null),
('A', null),
('A', null),
('あ', null),
('ぁ', null),
('ア', null),
('ァ', null),
('高橋', null),
('髙橋', null),
('-', null),
('−', null),
('+', null),
('+', null),
('0', null),
('0', null),
('1', null),
('1', null),
(null, 0),
(null, 1),
(null, 9),
(null,  100)
;

insert into `utf8_general_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;

insert into `utf8_unicode_ci`(`str`, `num`) select `str`, `num` from `utf8_bin`;

確認してみた

これ、途中で気づいたんだけど、プロシージャで書けばよかった…
思いつきで書き始めたけど、なんか長くなってしまった。

dml_test.sql
select '## str = a(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'a'
;
show warnings;

select '## str = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'A'
;
show warnings;

select '## str = あ(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'あ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'あ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'あ'
;
show warnings;

select '## str = ぁ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ぁ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ぁ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ぁ'
;
show warnings;

select '## str = ア(大文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ア'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ア'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ア'
;
show warnings;

select '## str = ァ(小文字)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = 'ァ'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = 'ァ'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = 'ァ'
;
show warnings;

select '## str = 高橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '高橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '高橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '高橋'
;
show warnings;

select '## str = 髙橋' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '髙橋'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '髙橋'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '髙橋'
;
show warnings;

select '## str = -(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '-'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '-'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '-'
;
show warnings;

select '## str = −(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '−'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '−'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '−'
;
show warnings;

select '## str = +(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = +(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '+'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '+'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '+'
;
show warnings;

select '## str = 0(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 0(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '0'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '0'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '0'
;
show warnings;

select '## str = 1(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str = 1(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` = '1'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` = '1'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` = '1'
;
show warnings;

select '## str like \'%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '高%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '高%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '高%'
;
show warnings;

select '## str like \'%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `str` from `utf8_bin` where `str` like '髙%'
union select 'utf8_general_ci' as 'table', `id`, `str` from `utf8_general_ci` where `str` like '髙%'
union select 'utf8_unicode_ci' as 'table', `id`, `str` from `utf8_unicode_ci` where `str` like '髙%'
;
show warnings;

select '## num = 0' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 0
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 0
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 0
;
show warnings;

select '## num = a(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num = AAA(半角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'AAA'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'AAA'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'AAA'
;
show warnings;

select '## num = a(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'a'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'a'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'a'
;
show warnings;

select '## num = A(全角)' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` = 'A'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` = 'A'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` = 'A'
;
show warnings;

select '## num like \'1%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like '1%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like '1%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like '1%'
;
show warnings;

select '## num like \'a%\'' as '実行内容';
select 'utf8_bin' as 'table', `id`, `num` from `utf8_bin` where `num` like 'a%'
union select 'utf8_general_ci' as 'table', `id`, `num` from `utf8_general_ci` where `num` like 'a%'
union select 'utf8_unicode_ci' as 'table', `id`, `num` from `utf8_unicode_ci` where `num` like 'a%'
;
show warnings;

テスト結果

これもまただらだらいきます(;´∀`)

str = a(半角)

table id str
utf8_bin 1 a
utf8_general_ci 1 a
utf8_general_ci 3 A
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = A(半角)

table id str
utf8_bin 3 A
utf8_general_ci 1 a
utf8_general_ci 3 A
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = a(全角)

table id str
utf8_bin 2
utf8_general_ci 2
utf8_general_ci 4
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = A(全角)

table id str
utf8_bin 4
utf8_general_ci 2
utf8_general_ci 4
utf8_unicode_ci 1 a
utf8_unicode_ci 2
utf8_unicode_ci 3 A
utf8_unicode_ci 4
  • エラー無し

str = あ(大文字)

table id str
utf8_bin 5
utf8_general_ci 5
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ぁ(小文字)

table id str
utf8_bin 6
utf8_general_ci 6
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ア(大文字)

table id str
utf8_bin 7
utf8_general_ci 7
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = ァ(小文字)

table id str
utf8_bin 8
utf8_general_ci 8
utf8_unicode_ci 5
utf8_unicode_ci 6
utf8_unicode_ci 7
utf8_unicode_ci 8
  • エラー無し

str = 高橋

table id str
utf8_bin 9 高橋
utf8_general_ci 9 高橋
utf8_unicode_ci 9 高橋
  • エラー無し

str = 髙橋

table id str
utf8_bin 10 髙橋
utf8_general_ci 10 髙橋
utf8_unicode_ci 10 髙橋
  • エラー無し

str = -(半角)

table id str
utf8_bin 11 -
utf8_general_ci 11 -
utf8_unicode_ci 11 -
  • エラー無し

str = −(全角)

table id str
utf8_bin 12
utf8_general_ci 12
utf8_unicode_ci 12
  • エラー無し

str = +(半角)

table id str
utf8_bin 13 +
utf8_general_ci 13 +
utf8_unicode_ci 13 +
utf8_unicode_ci 14
  • エラー無し

str = +(全角)

table id str
utf8_bin 14
utf8_general_ci 14
utf8_unicode_ci 13 +
utf8_unicode_ci 14
  • エラー無し

str = 0(半角)

table id str
utf8_bin 15 0
utf8_general_ci 15 0
utf8_unicode_ci 15 0
utf8_unicode_ci 16
  • エラー無し

str = 0(全角)

table id str
utf8_bin 16
utf8_general_ci 16
utf8_unicode_ci 15 0
utf8_unicode_ci 16
  • エラー無し

str = 1(半角)

table id str
utf8_bin 17 1
utf8_general_ci 17 1
utf8_unicode_ci 17 1
utf8_unicode_ci 18
  • エラー無し

str = 1(全角)

table id str
utf8_bin 18
utf8_general_ci 18
utf8_unicode_ci 17 1
utf8_unicode_ci 18
  • エラー無し

str like '高%'

table id str
utf8_bin 9 高橋
utf8_general_ci 9 高橋
utf8_unicode_ci 9 高橋
  • エラー無し

str like '髙%'

table id str
utf8_bin 10 髙橋
utf8_general_ci 10 髙橋
utf8_unicode_ci 10 髙橋
  • エラー無し

num = 0

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0
  • エラー無し

num = a(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'

num = A(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'

num = AAA(半角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'
Warning 1292 Truncated incorrect DOUBLE value: 'AAA'

num = a(全角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'
Warning 1292 Truncated incorrect DOUBLE value: 'a'

num = A(全角)

table id num
utf8_bin 19 0
utf8_general_ci 19 0
utf8_unicode_ci 19 0

show warnings;

Level Code Message --
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'
Warning 1292 Truncated incorrect DOUBLE value: 'A'

num like '1%'

table id num
utf8_bin 20 1
utf8_bin 22 100
utf8_general_ci 20 1
utf8_general_ci 22 100
utf8_unicode_ci 20 1
utf8_unicode_ci 22 100
  • エラー無し

num like 'a%'

  • マッチせず
  • エラー無し

思ったこと

utf8_bin / utf8_general_ciはそんなに違いは無いのね。
大文字・小文字を判別するかしないか、くらいっぽい。

utf8_unicode_ciはヒトが考えるマッチ条件に似ているかな。
全半角をゴチャッと検索できるのはうれしいかも。システム的には厳密じゃないけど。

理解して使い分けて行かないと思わぬバグを生産しそうだけど、
とっても便利だという事がわかりました。