
More than 5 years have passed since last update.


Last updated at Posted at 2015-10-30





見た感じ明らかにfull widthな文字が1文字分の幅扱いだったり、mb_strwidthによる文字幅判定は結構危ないかも・・・

U+0000 - U+10FFFFの範囲内で"mb_strwidthとして"正しい仕様

character width
U+0000 - U+10FF 1
U+1100 - U+115F 2
U+1160 - U+11A2 1
U+11A3 - U+11A7 2
U+11A8 - U+11F9 1
U+11FA - U+11FF 2
U+1200 - U+2328 1
U+2329 - U+232A 2
U+232B - U+2E7F 1
U+2E80 - U+2E99 2
U+2E9A - U+2E9A 1
U+2E9B - U+2EF3 2
U+2EF4 - U+2EFF 1
U+2F00 - U+2FD5 2
U+2FD6 - U+2FEF 1
U+2FF0 - U+2FFB 2
U+2FFC - U+2FFF 1
U+3000 - U+303E 2
U+303F - U+3040 1
U+3041 - U+3096 2
U+3097 - U+3098 1
U+3099 - U+30FF 2
U+3100 - U+3104 1
U+3105 - U+312D 2
U+312E - U+3130 1
U+3131 - U+318E 2
U+318F - U+318F 1
U+3190 - U+31BA 2
U+31BB - U+31BF 1
U+31C0 - U+31E3 2
U+31E4 - U+31EF 1
U+31F0 - U+321E 2
U+321F - U+321F 1
U+3220 - U+3247 2
U+3248 - U+324F 1
U+3250 - U+32FE 2
U+32FF - U+32FF 1
U+3300 - U+4DBF 2
U+4DC0 - U+4DFF 1
U+4E00 - U+A48C 2
U+A48D - U+A48F 1
U+A490 - U+A4C6 2
U+A4C7 - U+A95F 1
U+A960 - U+A97C 2
U+A97D - U+ABFF 1
U+AC00 - U+D7A3 2
U+D7A4 - U+D7AF 1
U+D7B0 - U+D7C6 2
U+D7C7 - U+D7CA 1
U+D7CB - U+D7FB 2
U+D7FC - U+F8FF 1
U+F900 - U+FAFF 2
U+FB00 - U+FE0F 1
U+FE10 - U+FE19 2
U+FE1A - U+FE2F 1
U+FE30 - U+FE52 2
U+FE53 - U+FE53 1
U+FE54 - U+FE66 2
U+FE67 - U+FE67 1
U+FE68 - U+FE6B 2
U+FE6C - U+FF00 1
U+FF01 - U+FF60 2
U+FF61 - U+FFDF 1
U+FFE0 - U+FFE6 2
U+FFE7 - U+1AFFF 1
U+1B000 - U+1B001 2
U+1B002 - U+1F1FF 1
U+1F200 - U+1F202 2
U+1F203 - U+1F20F 1
U+1F210 - U+1F23A 2
U+1F23B - U+1F23F 1
U+1F240 - U+1F248 2
U+1F249 - U+1F24F 1
U+1F250 - U+1F251 2
U+1F252 - U+1FFFF 1
U+20000 - U+2FFFD 2
U+30000 - U+3FFFD 2
U+3FFFE - U+10FFFF 1


PHP 5.6.15

PHP Manualにある仕様

PHP: mb_strwidth - Manual

character width
U+0000 - U+0019 0
U+0020 - U+1FFF 1
U+2000 - U+FF60 2
U+FF61 - U+FF9F 1
U+FFA0 - 2



mb_strwidth('※', 'UTF-8') === 2; // is false
mb_strwidth('※', 'UTF-8') === 1; // is true
var char_code = ''.charCodeAt(0);
var width = 0;
if (0x0000 <= char_code && char_code <= 0x0019) {
} else if (0x0020 <= char_code && char_code <= 0x1FFF) {
    width = 1;
} else if (0x2000 <= char_code && char_code <= 0xFF60) {
    width = 2;
} else if (0xFF61 <= char_code && char_code <= 0xFF9F) {
    width = 1;
} else if (0xFFA0 <= char_code) {
    width = 2;
width == 2; // is true

PHP Manualにある仕様に則って作ったのに差分があるじゃないか!
※ '※'は0x203Bのため、U+2000 - U+FF60の範囲に入ります。




<table border="1">
            <td>start char</td>
            <td>code (dex)</td>
function int2utf8($code_point) {
    if ($code_point < 0) {
        throw new \Exception(sprintf('%1$s is out of range UTF-16 code point (0x000000 - 0x10FFFF)', $code_point));
    if (0x10FFFF < $code_point) {
        throw new \Exception(sprintf('0x%1$X is out of range UTF-16 code point (0x000000 - 0x10FFFF)', $code_point));

    if (0xD800 <= $code_point && $code_point <= 0xDFFF) {
        throw new \Exception(sprintf('0x%X is in of range surrogate pair code point (0xD800 - 0xDFFF)', $code_point));

    if ($code_point < 0x80) {
        return chr($code_point);

    if ($code_point < 0xA0) {
        return chr(0xC0 | $code_point >> 6) . chr(0x80 | $code_point & 0x3F);

    return html_entity_decode('&#'. $code_point .';');

$stacker            = [];
$prev_width         = -1;
$prev_dec_char_code = 0;
$start                       = hexdec('0x0');
$end                         = hexdec('0x10FFFF');

for ($i = $start;$i <= $end;$i++) {
    if (0xD800 <= $i && $i <= 0xDFFF) {
    $char = int2utf8($i);

    $current_width = mb_strwidth($char, 'UTF-8');
    if ($prev_width !== $current_width) {
        $stacker[] = [
            'char'                  => $char,
            'dec_char_code'         => $i,
            'width'                 => $current_width,

        $prev_dec_char_code = $i;
        $prev_width         = $current_width;

$i = 1;
foreach ($stacker as $stack) {
    $next = isset($stacker[$i]) ? $stacker[$i] : ['dec_char_code' => $end + 1];
            <td><?= $stack['char'] ?></td>
            <td><?= sprintf('0x%X', $stack['dec_char_code']) ?> (<?= $stack['dec_char_code'] ?>)</td>
            <td><?= sprintf("U+%X - U+%X", $stack['dec_char_code'], $next['dec_char_code'] - 1); ?></td>
            <td><?= $stack['width'] ?></td>
            <td><?= sprintf("[hexdec('0x%X'), hexdec('0x%X'), %s],", $stack['dec_char_code'], $next['dec_char_code'] - 1, $stack['width']); ?></td>



diff countが0なら問題無し。


function int2utf8($code_point) {
    if ($code_point < 0) {
        throw new \Exception(sprintf('%1$s is out of range UTF-16 code point (0x000000 - 0x10FFFF)', $code_point));
    if (0x10FFFF < $code_point) {
        throw new \Exception(sprintf('0x%1$X is out of range UTF-16 code point (0x000000 - 0x10FFFF)', $code_point));

    if (0xD800 <= $code_point && $code_point <= 0xDFFF) {
        throw new \Exception(sprintf('0x%X is in of range surrogate pair code point (0xD800 - 0xDFFF)', $code_point));

    if ($code_point < 0x80) {
        return chr($code_point);

    if ($code_point < 0xA0) {
        return chr(0xC0 | $code_point >> 6) . chr(0x80 | $code_point & 0x3F);

    return html_entity_decode('&#'. $code_point .';');

$spec_list = [
    [hexdec('0x0'), hexdec('0x10FF'), 1],
    [hexdec('0x1100'), hexdec('0x115F'), 2],
    [hexdec('0x1160'), hexdec('0x11A2'), 1],
    [hexdec('0x11A3'), hexdec('0x11A7'), 2],
    [hexdec('0x11A8'), hexdec('0x11F9'), 1],
    [hexdec('0x11FA'), hexdec('0x11FF'), 2],
    [hexdec('0x1200'), hexdec('0x2328'), 1],
    [hexdec('0x2329'), hexdec('0x232A'), 2],
    [hexdec('0x232B'), hexdec('0x2E7F'), 1],
    [hexdec('0x2E80'), hexdec('0x2E99'), 2],
    [hexdec('0x2E9A'), hexdec('0x2E9A'), 1],
    [hexdec('0x2E9B'), hexdec('0x2EF3'), 2],
    [hexdec('0x2EF4'), hexdec('0x2EFF'), 1],
    [hexdec('0x2F00'), hexdec('0x2FD5'), 2],
    [hexdec('0x2FD6'), hexdec('0x2FEF'), 1],
    [hexdec('0x2FF0'), hexdec('0x2FFB'), 2],
    [hexdec('0x2FFC'), hexdec('0x2FFF'), 1],
    [hexdec('0x3000'), hexdec('0x303E'), 2],
    [hexdec('0x303F'), hexdec('0x3040'), 1],
    [hexdec('0x3041'), hexdec('0x3096'), 2],
    [hexdec('0x3097'), hexdec('0x3098'), 1],
    [hexdec('0x3099'), hexdec('0x30FF'), 2],
    [hexdec('0x3100'), hexdec('0x3104'), 1],
    [hexdec('0x3105'), hexdec('0x312D'), 2],
    [hexdec('0x312E'), hexdec('0x3130'), 1],
    [hexdec('0x3131'), hexdec('0x318E'), 2],
    [hexdec('0x318F'), hexdec('0x318F'), 1],
    [hexdec('0x3190'), hexdec('0x31BA'), 2],
    [hexdec('0x31BB'), hexdec('0x31BF'), 1],
    [hexdec('0x31C0'), hexdec('0x31E3'), 2],
    [hexdec('0x31E4'), hexdec('0x31EF'), 1],
    [hexdec('0x31F0'), hexdec('0x321E'), 2],
    [hexdec('0x321F'), hexdec('0x321F'), 1],
    [hexdec('0x3220'), hexdec('0x3247'), 2],
    [hexdec('0x3248'), hexdec('0x324F'), 1],
    [hexdec('0x3250'), hexdec('0x32FE'), 2],
    [hexdec('0x32FF'), hexdec('0x32FF'), 1],
    [hexdec('0x3300'), hexdec('0x4DBF'), 2],
    [hexdec('0x4DC0'), hexdec('0x4DFF'), 1],
    [hexdec('0x4E00'), hexdec('0xA48C'), 2],
    [hexdec('0xA48D'), hexdec('0xA48F'), 1],
    [hexdec('0xA490'), hexdec('0xA4C6'), 2],
    [hexdec('0xA4C7'), hexdec('0xA95F'), 1],
    [hexdec('0xA960'), hexdec('0xA97C'), 2],
    [hexdec('0xA97D'), hexdec('0xABFF'), 1],
    [hexdec('0xAC00'), hexdec('0xD7A3'), 2],
    [hexdec('0xD7A4'), hexdec('0xD7AF'), 1],
    [hexdec('0xD7B0'), hexdec('0xD7C6'), 2],
    [hexdec('0xD7C7'), hexdec('0xD7CA'), 1],
    [hexdec('0xD7CB'), hexdec('0xD7FB'), 2],
    [hexdec('0xD7FC'), hexdec('0xF8FF'), 1],
    [hexdec('0xF900'), hexdec('0xFAFF'), 2],
    [hexdec('0xFB00'), hexdec('0xFE0F'), 1],
    [hexdec('0xFE10'), hexdec('0xFE19'), 2],
    [hexdec('0xFE1A'), hexdec('0xFE2F'), 1],
    [hexdec('0xFE30'), hexdec('0xFE52'), 2],
    [hexdec('0xFE53'), hexdec('0xFE53'), 1],
    [hexdec('0xFE54'), hexdec('0xFE66'), 2],
    [hexdec('0xFE67'), hexdec('0xFE67'), 1],
    [hexdec('0xFE68'), hexdec('0xFE6B'), 2],
    [hexdec('0xFE6C'), hexdec('0xFF00'), 1],
    [hexdec('0xFF01'), hexdec('0xFF60'), 2],
    [hexdec('0xFF61'), hexdec('0xFFDF'), 1],
    [hexdec('0xFFE0'), hexdec('0xFFE6'), 2],
    [hexdec('0xFFE7'), hexdec('0x1AFFF'), 1],
    [hexdec('0x1B000'), hexdec('0x1B001'), 2],
    [hexdec('0x1B002'), hexdec('0x1F1FF'), 1],
    [hexdec('0x1F200'), hexdec('0x1F202'), 2],
    [hexdec('0x1F203'), hexdec('0x1F20F'), 1],
    [hexdec('0x1F210'), hexdec('0x1F23A'), 2],
    [hexdec('0x1F23B'), hexdec('0x1F23F'), 1],
    [hexdec('0x1F240'), hexdec('0x1F248'), 2],
    [hexdec('0x1F249'), hexdec('0x1F24F'), 1],
    [hexdec('0x1F250'), hexdec('0x1F251'), 2],
    [hexdec('0x1F252'), hexdec('0x1FFFF'), 1],
    [hexdec('0x20000'), hexdec('0x2FFFD'), 2],
    [hexdec('0x2FFFE'), hexdec('0x2FFFF'), 1],
    [hexdec('0x30000'), hexdec('0x3FFFD'), 2],
    [hexdec('0x3FFFE'), hexdec('0x10FFFF'), 1],

$spec = array_shift($spec_list);

$diff_cnt = 0;

<table border="2">
        <td>code (dex)</td>
        <td>spec width</td>
for ($i = hexdec('0x0000'), $end = hexdec('0x10FFFF');$i <= $end;$i++) {
    if (0xD800 <= $i && $i <= 0xDFFF) {
    $target_text = int2utf8($i);

    if ($spec[1] < $i) {
        $spec = array_shift($spec_list);

    if (mb_strwidth($target_text) !== $spec[2]) {
        <td><?= $target_text ?></td>
        <td><?= sprintf('0x%X', $i) ?> (<?= $i ?>)</td>
        <td><?= $spec[2] ?></td>
        <td><?= mb_strwidth($target_text) ?></td>

diff count : <?= $diff_cnt; ?>


Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up