MySQL の utf8 な varchar の 85 と 86 の境とか nullable のサイズとか

MySQL

Posted at 2018-08-16

MySQL 8.0.11
ENGINE=InnoDB
ROW_FORMAT=Dynamic
CHARSET=utf8

MySQL で utf8 な varchar の 85 と 86 は、バイト数だと 85*3 = 255 86*3 = 258 なので、フィールド長のためのサイズが 1 バイトか 2 バイトかの境があります。

しかし、いままできちんと検証したことがなかったので確かめてみました。

まず、次のように varbinary で長い列を持つテーブルを作ります。65534 以上の長い列だと失敗します。これはバイト数に 2 バイト必要なので 65534 だと 65534+2 で 65536 となり、MySQL のレコード長の上限を超えてしまうためです。

drop table if exists t;
create table t (c varbinary(65534) not null);
/* ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs */

create table t (c varbinary(65533) not null);
/* Query OK, 0 rows affected (0.13 sec) */

次に varchar(85) を含むテーブルを次の通りに作成してみます。

drop table if exists t;
create table t (v varchar(85) not null,c varbinary(65278) not null);
/* ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs */

create table t (v varchar(85) not null,c varbinary(65277) not null);
/* Query OK, 0 rows affected (0.09 sec) */

それぞれ次通りにレコード長が計算できます。

前者は 85 * 3 + 1 + 65278 + 2 = 65536
後者は 85 * 3 + 1 + 65277 + 2 = 65535

なので前者は失敗します。

次に varchar(86) を含むテーブルを次の通りに作成してみます。

drop table t;
create table t (v varchar(86) not null,c varbinary(65274) not null);
/* ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs */

create table t (v varchar(86) not null,c varbinary(65273) not null);
/* Query OK, 0 rows affected (0.19 sec) */

varchar(86) だとフィールド長のために 2 バイト必要になるので次の通り計算になります。

前者は 86 * 3 + 2 + 65274 + 2 = 65536
後者は 86 * 3 + 2 + 65273 + 2 = 65535

よって、予想通り前者は失敗しています。

ので、utf8 の varchar だと 85 と 86 でフィールド長のための領域が 1 バイトか 2 バイトかという堺があります。

utf8mb4 では

255/4 = 63 なので 63 と 64 に境があります。

drop table if exists t;
create table t (v varchar(63) charset utf8mb4 not null,c varbinary(65281) not null);
/* ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs */

create table t (v varchar(63) charset utf8mb4 not null,c varbinary(65280) not null);
/* Query OK, 0 rows affected (0.21 sec) */

drop table if exists t;
create table t (v varchar(64) charset utf8mb4 not null,c varbinary(65276) not null);
/* ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs */

create table t (v varchar(64) charset utf8mb4 not null,c varbinary(65275) not null);
/* Query OK, 0 rows affected (0.11 sec) */

63 * 4 + 1 + 65281 + 2 = 65536
63 * 4 + 1 + 65280 + 2 = 65535
64 * 4 + 2 + 65276 + 2 = 65536
64 * 4 + 2 + 65275 + 2 = 65535

ので、なんとなくキリが良い？からと言って varchar(64) とかしていると varchar(63) と比べて常に１バイト余分に必要になってしまっているかも？

nullable

列が nullable だと 1 ビット余分に必要です。

drop table if exists t;
create table t (c varbinary(65533));
-- ERROR 1118 (42000): Row size too large. The maximum row size for the used table type, not counting BLOBs, is 65535. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs

create table t (c varbinary(65533) not null);
-- Query OK, 0 rows affected (0.13 sec)

1 バイトではなく 1 ビットで、バイト単位の端数は切り上げです。

drop table if exists t;
create table t (
    v1 varbinary(8190) not null,
    v2 varbinary(8190) not null,
    v3 varbinary(8190) not null,
    v4 varbinary(8190) not null,
    v5 varbinary(8190) not null,
    v6 varbinary(8190) not null,
    v7 varbinary(8190) not null,
    v8 varbinary(8190) not null
);
-- ERROR: 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 = 65536

drop table if exists t;
create table t (
    v1 varbinary(8190) not null,
    v2 varbinary(8190) not null,
    v3 varbinary(8190) not null,
    v4 varbinary(8190) not null,
    v5 varbinary(8190) not null,
    v6 varbinary(8190) not null,
    v7 varbinary(8190) not null,
    v8 varbinary(8189) not null
);
-- OK: 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8189+2 = 65535

drop table if exists t;
create table t (
    v1 varbinary(8190) not null,
    v2 varbinary(8190) not null,
    v3 varbinary(8190) not null,
    v4 varbinary(8190) not null,
    v5 varbinary(8190) not null,
    v6 varbinary(8190) not null,
    v7 varbinary(8190) not null,
    v8 varbinary(8189) null
);
-- ERROR: 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8189+2 + 1/8 = 65535+1/8

drop table if exists t;
create table t (
    v1 varbinary(8190) null,
    v2 varbinary(8190) null,
    v3 varbinary(8190) null,
    v4 varbinary(8190) null,
    v5 varbinary(8190) null,
    v6 varbinary(8190) null,
    v7 varbinary(8190) null,
    v8 varbinary(8188) null
);
-- OK: 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8190+2 + 8188+2 + 8/8 = 65534+8/8

列拡張や縮小時の ALTER の速度

雑にでかめのテーブルを作ります。

drop table if exists t;
create table t (id int not null primary key auto_increment, v varchar(1) not null);
insert into t (v) value ('x');

insert into t (v) select v from t;
insert into t (v) select v from t;
:
insert into t (v) select v from t;
insert into t (v) select v from t;
-- Query OK, 524288 rows affected (15.29 sec)
-- Records: 524288  Duplicates: 0  Warnings: 0

フィールド長のサイズが変わらない範囲であればレコードはそのままなので早いです。

alter table t modify v varchar(2) not null;
-- Query OK, 0 rows affected (0.08 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

alter table t change v v varchar(85) not null;
-- Query OK, 0 rows affected (0.06 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

ただし縮小するのは駄目なようです。実際のフィールド値が収まるのだとしても。

alter table t modify v varchar(84) not null;
-- Query OK, 1048576 rows affected (14.10 sec)
-- Records: 1048576  Duplicates: 0  Warnings: 0

列長のサイズが変わる 85 と 86 の境界でめちゃ時間かかります。

alter table t modify v varchar(85) not null;
-- Query OK, 0 rows affected (0.11 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

alter table t modify v varchar(86) not null;
-- Query OK, 1048576 rows affected (14.87 sec)
-- Records: 1048576  Duplicates: 0  Warnings: 0

alter table t modify v varchar(21843) not null;
-- Query OK, 0 rows affected (0.02 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

仕様的に文字長があまり重要ではない場合は後から拡張することを見越してあらかじめフィールド長のサイズに 2 バイト必要な 86 で定義しておくと良いかも？いやあるいはなるべくサイズを小さくするためにあえて 1 バイトしかかからない 85 以下を狙って定義する、という考えもありかも。

あと、理由はよくわからないですが nullable の変更は半端に時間かかります。nullable の変更によってレコード長が変わると思っていたので列拡張と同じ程度の時間かかりそうに思ったのですが、フィールドのサイズの変更とレコードのサイズの変更ではまた違うということでしょうか。

alter table t modify v varchar(2) not null;

alter table t modify v varchar(2);
-- Query OK, 0 rows affected (4.95 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

alter table t modify v varchar(2) not null;
-- Query OK, 0 rows affected (4.75 sec)
-- Records: 0  Duplicates: 0  Warnings: 0

参考

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up