元ネタ
utf8 な MySQL データベースを保守する湯婆婆「𠮷田?贅沢な名だね。今からお前の名前は吉田。いいかい、吉田。分かったら返事をするんだ、吉田!」
— 𝑺𝒐𝒄𝒊𝒂𝒍 𝑫𝒊𝒔𝒕𝒂𝒏𝒄𝒆 (@mpyw) April 30, 2020
utf8
から utf8mb4
に早く乗り換えたいけどいろいろとレガシーアプリケーションには事情があるんじゃ!!!
実装
𠮷田さんが会員登録しようとすると 400 Bad Request を返します。エラーメッセージはできるだけ親切にしています。共通モジュール的な場所に置いたので, Translator
はインジェクトしてるけど文章自体は直接埋め込んでます。
<?php
namespace Libraries\Foundation\Http\Middleware;
use Illuminate\Contracts\Translation\Translator;
use Illuminate\Foundation\Http\Middleware\TransformsRequest;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;
/**
* Class RejectOutOfRangeCodepoints
*
* MySQL の utf8 で扱えない範囲の文字を検知してエラーにします。
*/
class RejectOutOfRangeCodepoints extends TransformsRequest
{
// 簡易的に直接翻訳を埋め込む
protected const TRANSLATIONS = [
'ja' => [
'utf8_invalid' => '送信データに無効な文字が含まれています。',
'utf8_out_of_range' => '送信データに取り扱いできない文字%sが含まれています。異体字や絵文字などは使用しないでください。',
],
'en' => [
'utf8_invalid' => 'The request data contains invalid characters.',
'utf8_out_of_range' => 'The request data contains the unacceptable characters: %s. Please do not use variants or emoji.',
],
];
protected Translator $translator;
/**
* The attributes that should not be validated.
*
* @var string[]
*/
protected array $except = [
];
/**
* RejectOutOfRangeCodepoints constructor.
*
* @param Translator $translator
*/
public function __construct(Translator $translator)
{
$this->translator = $translator;
}
/**
* Validate the given value.
*
* @param string $key
* @param mixed $value
* @return mixed
*/
public function transform($key, $value)
{
if (!in_array($key, $this->except, true)) {
foreach ([$key, $value] as $string) {
if (is_string($string) && $string !== '') {
$this->validate($string);
}
}
}
return $value;
}
/**
* Validate the given codepoints exist in range of MySQL "utf8".
*
* @param string $value
*/
protected function validate(string $value): void
{
// 標準の UTF-8 として無効なデータをチェック
// (u 修飾子を付与すると無効なデータに対して false を返すようになる)
if (!preg_match('//u', $value)) {
throw new BadRequestHttpException($this->message('utf8_invalid'));
}
// 標準の UTF-8 としては有効だが MySQL の utf8 に収まらないデータをチェック
// c.f. http://otndnld.oracle.co.jp/document/products/oracle10g/102/doc_cd/server.102/B19218-02/appunicode.htm
if (preg_match_all('/
[\xf0] [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]
| [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]
| [\xf4] [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
/x', $value, $matches)) {
throw new BadRequestHttpException($this->message('utf8_out_of_range', $this->gatherMatchedCharacters($matches[0])));
}
}
/**
* @param string[] $matches
* @return string
*/
protected function gatherMatchedCharacters(array $matches): string
{
assert(count($matches) > 0);
// 日本語はシンプルに鉤括弧で連結する
// 英語はダブルクオーテーションで括り,カンマで連結したあと最後の要素のみ and で連結する
[$format, $glue, $lastGlue] = $this->lang() !== 'en'
? ['「%s」', '', '']
: ['"%s"', ', ', ' and '];
$matches = array_unique(array_map(fn (string $match) => sprintf($format, $match), $matches));
$lastMatch = array_pop($matches);
return implode($lastGlue, array_filter([implode($glue, $matches), $lastMatch], 'strlen'));
}
/**
* @param string $key
* @param string[] $args
* @return string
*/
protected function message(string $key, ...$args): string
{
return sprintf(static::TRANSLATIONS[$this->lang()][$key], ...$args);
}
/**
* @return string
*/
protected function lang(): string
{
return $this->translator->getLocale() !== 'en' ? 'ja' : 'en';
}
}
テスト
<?php
namespace Libraries\Tests\Unit\Foundation\Http\Middleware;
use Libraries\Foundation\Http\Middleware\RejectOutOfRangeCodepoints;
use Illuminate\Contracts\Translation\Translator;
use Illuminate\Http\Request;
use Mockery;
use Mockery\MockInterface;
use PHPUnit\Framework\TestCase;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;
class RejectOutOfRangeCodepointsTest extends TestCase
{
/**
* @var \Illuminate\Contracts\Translation\Translator|Mockery\LegacyMockInterface|Mockery\MockInterface
*/
protected MockInterface $translator;
public function setUp(): void
{
parent::setUp();
$this->translator = Mockery::mock(Translator::class);
}
protected function apply(Request $request): Request
{
return (new RejectOutOfRangeCodepoints($this->translator))
->handle($request, fn ($request) => $request);
}
public function testValid(): void
{
$before = Request::create('/foo', 'POST', [
'company_name' => '㈱ 吉野家ホールディングス',
]);
$after = $this->apply($before);
$this->assertSame([
'company_name' => '㈱ 吉野家ホールディングス',
], $after->all());
}
public function testInvalidJapanese(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('ja');
$before = Request::create('/foo', 'POST', [
'invalid' => "\xff\xff\xff",
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('送信データに無効な文字が含まれています。');
$this->apply($before);
}
public function testInvalidEnglish(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('en');
$before = Request::create('/foo', 'POST', [
'invalid' => "\xff\xff\xff",
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('The request data contains invalid characters.');
$this->apply($before);
}
public function testOutOfRangeCodepointSingleJapanese(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('ja');
$before = Request::create('/foo', 'POST', [
'restaurant_name' => '𠮷野家',
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」が含まれています。異体字や絵文字などは使用しないでください。');
$this->apply($before);
}
public function testOutOfRangeCodepointsDoubleJapanese(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('ja');
$before = Request::create('/foo', 'POST', [
'description' => '𠮷野家のご飯🍚',
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」「🍚」が含まれています。異体字や絵文字などは使用しないでください。');
$this->apply($before);
}
public function testOutOfRangeCodepointsSingleEnglish(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('en');
$before = Request::create('/foo', 'POST', [
'restaurant_name' => '𠮷野家',
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷". Please do not use variants or emoji.');
$this->apply($before);
}
public function testOutOfRangeCodepointsDoubleEnglish(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('en');
$before = Request::create('/foo', 'POST', [
'description' => '𠮷野家のご飯🍚',
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷" and "🍚". Please do not use variants or emoji.');
$this->apply($before);
}
public function testOutOfRangeCodepointsMultipleEnglish(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('en');
$before = Request::create('/foo', 'POST', [
'description' => '𠮷野家のご飯🍚を家族👨👩👧👦で🖕🏽',
]);
// 合字は分解される(考えることが多いので考慮しない)
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷", "🍚", "👨", "👩", "👧", "👦", "🖕" and "🏽". Please do not use variants or emoji.');
$this->apply($before);
}
public function testOutOfRangeCodepointKey(): void
{
$this->translator->shouldReceive('getLocale')->andReturn('ja');
$before = Request::create('/foo', 'POST', [
'𠮷野家' => ['牛丼'],
]);
$this->expectException(BadRequestHttpException::class);
$this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」が含まれています。異体字や絵文字などは使用しないでください。');
$this->apply($before);
}
}