Last updated at Posted at 2020-04-30


utf8 から utf8mb4 に早く乗り換えたいけどいろいろとレガシーアプリケーションには事情があるんじゃ!!!


𠮷田さんが会員登録しようとすると 400 Bad Request を返します。エラーメッセージはできるだけ親切にしています。共通モジュール的な場所に置いたので, Translator はインジェクトしてるけど文章自体は直接埋め込んでます。


namespace Libraries\Foundation\Http\Middleware;

use Illuminate\Contracts\Translation\Translator;
use Illuminate\Foundation\Http\Middleware\TransformsRequest;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;

 * Class RejectOutOfRangeCodepoints
 * MySQL の utf8 で扱えない範囲の文字を検知してエラーにします。
class RejectOutOfRangeCodepoints extends TransformsRequest
    // 簡易的に直接翻訳を埋め込む
    protected const TRANSLATIONS = [
        'ja' => [
            'utf8_invalid' => '送信データに無効な文字が含まれています。',
            'utf8_out_of_range' => '送信データに取り扱いできない文字%sが含まれています。異体字や絵文字などは使用しないでください。',
        'en' => [
            'utf8_invalid' => 'The request data contains invalid characters.',
            'utf8_out_of_range' => 'The request data contains the unacceptable characters: %s. Please do not use variants or emoji.',

    protected Translator $translator;

     * The attributes that should not be validated.
     * @var string[]
    protected array $except = [

     * RejectOutOfRangeCodepoints constructor.
     * @param Translator $translator
    public function __construct(Translator $translator)
        $this->translator = $translator;

     * Validate the given value.
     * @param  string $key
     * @param  mixed  $value
     * @return mixed
    public function transform($key, $value)
        if (!in_array($key, $this->except, true)) {
            foreach ([$key, $value] as $string) {
                if (is_string($string) && $string !== '') {

        return $value;

     * Validate the given codepoints exist in range of MySQL "utf8".
     * @param string $value
    protected function validate(string $value): void
        // 標準の UTF-8 として無効なデータをチェック
        // (u 修飾子を付与すると無効なデータに対して false を返すようになる)
        if (!preg_match('//u', $value)) {
            throw new BadRequestHttpException($this->message('utf8_invalid'));

        // 標準の UTF-8 としては有効だが MySQL の utf8 に収まらないデータをチェック
        // c.f. http://otndnld.oracle.co.jp/document/products/oracle10g/102/doc_cd/server.102/B19218-02/appunicode.htm
        if (preg_match_all('/
              [\xf0]      [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]
            | [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]
            | [\xf4]      [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
        /x', $value, $matches)) {
            throw new BadRequestHttpException($this->message('utf8_out_of_range', $this->gatherMatchedCharacters($matches[0])));

     * @param  string[] $matches
     * @return string
    protected function gatherMatchedCharacters(array $matches): string
        assert(count($matches) > 0);

        // 日本語はシンプルに鉤括弧で連結する
        // 英語はダブルクオーテーションで括り,カンマで連結したあと最後の要素のみ and で連結する
        [$format, $glue, $lastGlue] = $this->lang() !== 'en'
            ? ['「%s」', '', '']
            : ['"%s"', ', ', ' and '];

        $matches = array_unique(array_map(fn (string $match) => sprintf($format, $match), $matches));
        $lastMatch = array_pop($matches);

        return implode($lastGlue, array_filter([implode($glue, $matches), $lastMatch], 'strlen'));

     * @param  string   $key
     * @param  string[] $args
     * @return string
    protected function message(string $key, ...$args): string
        return sprintf(static::TRANSLATIONS[$this->lang()][$key], ...$args);

     * @return string
    protected function lang(): string
        return $this->translator->getLocale() !== 'en' ? 'ja' : 'en';



namespace Libraries\Tests\Unit\Foundation\Http\Middleware;

use Libraries\Foundation\Http\Middleware\RejectOutOfRangeCodepoints;
use Illuminate\Contracts\Translation\Translator;
use Illuminate\Http\Request;
use Mockery;
use Mockery\MockInterface;
use PHPUnit\Framework\TestCase;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;

class RejectOutOfRangeCodepointsTest extends TestCase
     * @var \Illuminate\Contracts\Translation\Translator|Mockery\LegacyMockInterface|Mockery\MockInterface
    protected MockInterface $translator;

    public function setUp(): void

        $this->translator = Mockery::mock(Translator::class);

    protected function apply(Request $request): Request
        return (new RejectOutOfRangeCodepoints($this->translator))
            ->handle($request, fn ($request) => $request);

    public function testValid(): void
        $before = Request::create('/foo', 'POST', [
            'company_name' => '㈱ 吉野家ホールディングス',

        $after = $this->apply($before);

            'company_name' => '㈱ 吉野家ホールディングス',
        ], $after->all());

    public function testInvalidJapanese(): void

        $before = Request::create('/foo', 'POST', [
            'invalid' => "\xff\xff\xff",



    public function testInvalidEnglish(): void

        $before = Request::create('/foo', 'POST', [
            'invalid' => "\xff\xff\xff",

        $this->expectExceptionMessage('The request data contains invalid characters.');


    public function testOutOfRangeCodepointSingleJapanese(): void

        $before = Request::create('/foo', 'POST', [
            'restaurant_name' => '𠮷野家',



    public function testOutOfRangeCodepointsDoubleJapanese(): void

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚',



    public function testOutOfRangeCodepointsSingleEnglish(): void

        $before = Request::create('/foo', 'POST', [
            'restaurant_name' => '𠮷野家',

        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷". Please do not use variants or emoji.');


    public function testOutOfRangeCodepointsDoubleEnglish(): void

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚',

        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷" and "🍚". Please do not use variants or emoji.');


    public function testOutOfRangeCodepointsMultipleEnglish(): void

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚を家族👨‍👩‍👧‍👦で🖕🏽',

        // 合字は分解される(考えることが多いので考慮しない)
        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷", "🍚", "👨", "👩", "👧", "👦", "🖕" and "🏽". Please do not use variants or emoji.');


    public function testOutOfRangeCodepointKey(): void

        $before = Request::create('/foo', 'POST', [
            '𠮷野家' => ['牛丼'],



