More than 5 years have passed since last update.

備忘録: SHIFT JIS の領域

Shift_JIS

Posted at 2020-04-03

JIS X 0208 の領域と SHIFT JIS の領域の比較

JIS X 0208 の領域

  2121 ... 215F, 2160 ... 217E, 2221 ... 227E
   ...                                   ...
  5D21 ... 5D5F, 5D60 ... 5D7E, 5E21 ... 5E7E
  5F21 ... 5F5F, 5F60 ... 5F7E, 6021 ... 607E
   ...                                   ...
  7D21 ... 7D5F, 7D60 ... 7D7E, 7E21 ... 7E7E

SHIFT JIS の領域

  8140 ... 817E, 8180 ... 819E, 819F ... 81FC
   ...                                   ...
  9F40 ... 9F7E, 9F80 ... 9F9E, 9F9F ... 9FFC
  E040 ... E07E, E080 ... E09E, E09F ... E0FC
   ...                                   ...
  EF40 ... EF7E, EF80 ... EF9E, EF9F ... EFFC
  F040 ... F07E, F080 ... F09E, F09F ... F0FC : 以下 CP932 のみ
   ...                                   ...
  FC40 ... FC7E, FC80 ... FC9E, FC9F ... FCFC

範囲
JIS(1)	21:22	-	5D:5E	5F:60	-	7D:7E
CP932(1)	81	-	9F	E0	-	EF	F0	-	FC
JIS(2)	21	-	5F	60	-	7E	21	-	7E
CP932(2)	40	-	7E	80	-	9E	9F	-	FC

Shift-JIS を EUC に変換するサンプルプログラム

# include <stdio.h>

unsigned int is_sj(unsigned char c)
{
    return (((c ^ 0xa0) - 0x21) < 0x3c);
}

unsigned int sj2j(unsigned char h, unsigned char l)
{
    unsigned int jh = ((h ^ 0xa0) - 0x21) << 9;
    unsigned int jl = (l - 0x1f /* -40+21 */
                       + (l >= 0x80 ?   -1 : 0)
                       + (l >= 0x9F ? 0xa2 : 0)); /* A2 = 1F+01-9F+21+100 */
    return (jh + jl + 0x2100);
}

int main()
{
    int c1, c2, sj;

    while ((c1 = fgetc(stdin)) != EOF)
    {
        if (!is_sj(c1))
        {
            /* 半角カナは非対応 */
            fputc(c1, stdout);
            continue;
        }
        c2 = fgetc(stdin);
        if (c2 == EOF)
        {
            fputc(c1, stdout);
            break;
        }
        sj = sj2j(c1, c2); /* CP932 のみ領域は非対応 */
        c1 = sj >> 8;
        c2 = sj & 0xff;
        fputc(0x80 | c1, stdout);
        fputc(0x80 | c2, stdout);
    }
    return 0;
}

これを gcc -O3 -S -g0 でコンパイルした結果が興味深い。
main のインライン展開された sj2j は条件代入が使用されているが、
sj2j 関数は条件分岐になっている(保存レジスタの使用を避けるため?)。

asm

	.text
	.p2align 4,,15
	.globl	is_sj
	.type	is_sj, @function
is_sj:
.LFB23:
	.cfi_startproc
	xorl	$-96, %edi
	xorl	%eax, %eax
	cmpb	$92, %dil
	setbe	%al
	ret
	.cfi_endproc
.LFE23:
	.size	is_sj, .-is_sj
	.p2align 4,,15
	.globl	sj2j
	.type	sj2j, @function
sj2j:
.LFB24:
	.cfi_startproc
	xorl	$-96, %edi
	movzbl	%sil, %eax
	movzbl	%dil, %edi
	subl	$33, %edi
	sall	$9, %edi
	testb	%sil, %sil
	js	.L4
	leal	-31(%rax), %edx
	xorl	%eax, %eax
	addl	%edx, %eax
	leal	8448(%rdi,%rax), %eax
	ret
	.p2align 4,,10
	.p2align 3
.L4:
	cmpb	$-97, %sil
	leal	-32(%rax), %edx
	sbbl	%eax, %eax
	notl	%eax
	andl	$162, %eax
	addl	%edx, %eax
	leal	8448(%rdi,%rax), %eax
	ret
	.cfi_endproc
.LFE24:
	.size	sj2j, .-sj2j
	.section	.text.startup,"ax",@progbits
	.p2align 4,,15
	.globl	main
	.type	main, @function
main:
.LFB25:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	pushq	%rbx
	.cfi_def_cfa_offset 24
	.cfi_offset 3, -24
	subq	$8, %rsp
	.cfi_def_cfa_offset 32
	.p2align 4,,10
	.p2align 3
.L8:
	movq	stdin(%rip), %rdi
	call	fgetc@PLT
	cmpl	$-1, %eax
	movl	%eax, %ebp
	je	.L12
	movl	%ebp, %ebx
	xorl	$-96, %ebx
	cmpb	$92, %bl
	ja	.L16
	movq	stdin(%rip), %rdi
	call	fgetc@PLT
	cmpl	$-1, %eax
	je	.L17
	movzbl	%al, %edx
	movsbl	%al, %ecx
	movzbl	%bl, %ebx
	shrl	$31, %ecx
	subl	$31, %edx
	sall	$9, %ebx
	subl	%ecx, %edx
	cmpb	$-98, %al
	movq	stdout(%rip), %rsi
	leal	162(%rdx), %ecx
	cmova	%ecx, %edx
	leal	-8448(%rbx,%rdx), %ebx
	movl	%ebx, %edi
	sarl	$8, %edi
	orb	$-128, %dil
	call	fputc@PLT
	movq	stdout(%rip), %rsi
	movzbl	%bl, %edi
	orb	$-128, %dil
	call	fputc@PLT
	jmp	.L8
	.p2align 4,,10
	.p2align 3
.L16:
	movq	stdout(%rip), %rsi
	movl	%ebp, %edi
	call	fputc@PLT
	jmp	.L8
.L17:
	movq	stdout(%rip), %rsi
	movl	%ebp, %edi
	call	fputc@PLT
.L12:
	addq	$8, %rsp
	.cfi_def_cfa_offset 24
	xorl	%eax, %eax
	popq	%rbx
	.cfi_def_cfa_offset 16
	popq	%rbp
	.cfi_def_cfa_offset 8
	ret
	.cfi_endproc
.LFE25:
	.size	main, .-main
	.ident	"GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0"
	.section	.note.GNU-stack,"",@progbits

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up