LoginSignup
67
37

More than 3 years have passed since last update.

吉田さんは通すけど𠮷田さんは通さないミドルウェア

Last updated at Posted at 2020-04-30

元ネタ

utf8 から utf8mb4 に早く乗り換えたいけどいろいろとレガシーアプリケーションには事情があるんじゃ!!!

実装

𠮷田さんが会員登録しようとすると 400 Bad Request を返します。エラーメッセージはできるだけ親切にしています。共通モジュール的な場所に置いたので, Translator はインジェクトしてるけど文章自体は直接埋め込んでます。

<?php

namespace Libraries\Foundation\Http\Middleware;

use Illuminate\Contracts\Translation\Translator;
use Illuminate\Foundation\Http\Middleware\TransformsRequest;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;

/**
 * Class RejectOutOfRangeCodepoints
 *
 * MySQL の utf8 で扱えない範囲の文字を検知してエラーにします。
 */
class RejectOutOfRangeCodepoints extends TransformsRequest
{
    // 簡易的に直接翻訳を埋め込む
    protected const TRANSLATIONS = [
        'ja' => [
            'utf8_invalid' => '送信データに無効な文字が含まれています。',
            'utf8_out_of_range' => '送信データに取り扱いできない文字%sが含まれています。異体字や絵文字などは使用しないでください。',
        ],
        'en' => [
            'utf8_invalid' => 'The request data contains invalid characters.',
            'utf8_out_of_range' => 'The request data contains the unacceptable characters: %s. Please do not use variants or emoji.',
        ],
    ];

    protected Translator $translator;

    /**
     * The attributes that should not be validated.
     *
     * @var string[]
     */
    protected array $except = [
    ];

    /**
     * RejectOutOfRangeCodepoints constructor.
     *
     * @param Translator $translator
     */
    public function __construct(Translator $translator)
    {
        $this->translator = $translator;
    }

    /**
     * Validate the given value.
     *
     * @param  string $key
     * @param  mixed  $value
     * @return mixed
     */
    public function transform($key, $value)
    {
        if (!in_array($key, $this->except, true)) {
            foreach ([$key, $value] as $string) {
                if (is_string($string) && $string !== '') {
                    $this->validate($string);
                }
            }
        }

        return $value;
    }

    /**
     * Validate the given codepoints exist in range of MySQL "utf8".
     *
     * @param string $value
     */
    protected function validate(string $value): void
    {
        // 標準の UTF-8 として無効なデータをチェック
        // (u 修飾子を付与すると無効なデータに対して false を返すようになる)
        if (!preg_match('//u', $value)) {
            throw new BadRequestHttpException($this->message('utf8_invalid'));
        }

        // 標準の UTF-8 としては有効だが MySQL の utf8 に収まらないデータをチェック
        // c.f. http://otndnld.oracle.co.jp/document/products/oracle10g/102/doc_cd/server.102/B19218-02/appunicode.htm
        if (preg_match_all('/
              [\xf0]      [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]
            | [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]
            | [\xf4]      [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
        /x', $value, $matches)) {
            throw new BadRequestHttpException($this->message('utf8_out_of_range', $this->gatherMatchedCharacters($matches[0])));
        }
    }

    /**
     * @param  string[] $matches
     * @return string
     */
    protected function gatherMatchedCharacters(array $matches): string
    {
        assert(count($matches) > 0);

        // 日本語はシンプルに鉤括弧で連結する
        // 英語はダブルクオーテーションで括り,カンマで連結したあと最後の要素のみ and で連結する
        [$format, $glue, $lastGlue] = $this->lang() !== 'en'
            ? ['「%s」', '', '']
            : ['"%s"', ', ', ' and '];

        $matches = array_unique(array_map(fn (string $match) => sprintf($format, $match), $matches));
        $lastMatch = array_pop($matches);

        return implode($lastGlue, array_filter([implode($glue, $matches), $lastMatch], 'strlen'));
    }

    /**
     * @param  string   $key
     * @param  string[] $args
     * @return string
     */
    protected function message(string $key, ...$args): string
    {
        return sprintf(static::TRANSLATIONS[$this->lang()][$key], ...$args);
    }

    /**
     * @return string
     */
    protected function lang(): string
    {
        return $this->translator->getLocale() !== 'en' ? 'ja' : 'en';
    }
}

テスト

<?php

namespace Libraries\Tests\Unit\Foundation\Http\Middleware;

use Libraries\Foundation\Http\Middleware\RejectOutOfRangeCodepoints;
use Illuminate\Contracts\Translation\Translator;
use Illuminate\Http\Request;
use Mockery;
use Mockery\MockInterface;
use PHPUnit\Framework\TestCase;
use Symfony\Component\HttpKernel\Exception\BadRequestHttpException;

class RejectOutOfRangeCodepointsTest extends TestCase
{
    /**
     * @var \Illuminate\Contracts\Translation\Translator|Mockery\LegacyMockInterface|Mockery\MockInterface
     */
    protected MockInterface $translator;

    public function setUp(): void
    {
        parent::setUp();

        $this->translator = Mockery::mock(Translator::class);
    }

    protected function apply(Request $request): Request
    {
        return (new RejectOutOfRangeCodepoints($this->translator))
            ->handle($request, fn ($request) => $request);
    }

    public function testValid(): void
    {
        $before = Request::create('/foo', 'POST', [
            'company_name' => '㈱ 吉野家ホールディングス',
        ]);

        $after = $this->apply($before);

        $this->assertSame([
            'company_name' => '㈱ 吉野家ホールディングス',
        ], $after->all());
    }

    public function testInvalidJapanese(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('ja');

        $before = Request::create('/foo', 'POST', [
            'invalid' => "\xff\xff\xff",
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('送信データに無効な文字が含まれています。');

        $this->apply($before);
    }

    public function testInvalidEnglish(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('en');

        $before = Request::create('/foo', 'POST', [
            'invalid' => "\xff\xff\xff",
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('The request data contains invalid characters.');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointSingleJapanese(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('ja');

        $before = Request::create('/foo', 'POST', [
            'restaurant_name' => '𠮷野家',
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」が含まれています。異体字や絵文字などは使用しないでください。');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointsDoubleJapanese(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('ja');

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚',
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」「🍚」が含まれています。異体字や絵文字などは使用しないでください。');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointsSingleEnglish(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('en');

        $before = Request::create('/foo', 'POST', [
            'restaurant_name' => '𠮷野家',
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷". Please do not use variants or emoji.');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointsDoubleEnglish(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('en');

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚',
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷" and "🍚". Please do not use variants or emoji.');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointsMultipleEnglish(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('en');

        $before = Request::create('/foo', 'POST', [
            'description' => '𠮷野家のご飯🍚を家族👨‍👩‍👧‍👦で🖕🏽',
        ]);

        // 合字は分解される(考えることが多いので考慮しない)
        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('The request data contains the unacceptable characters: "𠮷", "🍚", "👨", "👩", "👧", "👦", "🖕" and "🏽". Please do not use variants or emoji.');

        $this->apply($before);
    }

    public function testOutOfRangeCodepointKey(): void
    {
        $this->translator->shouldReceive('getLocale')->andReturn('ja');

        $before = Request::create('/foo', 'POST', [
            '𠮷野家' => ['牛丼'],
        ]);

        $this->expectException(BadRequestHttpException::class);
        $this->expectExceptionMessage('送信データに取り扱いできない文字「𠮷」が含まれています。異体字や絵文字などは使用しないでください。');

        $this->apply($before);
    }
}
67
37
1

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
67
37