0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

組み込み用sjis->utf8

Last updated at Posted at 2025-04-29

サロゲートペアは非対応(sjis関係ないので)

目的

sjis -> utf8の軽量ライブラリが欲しい

実装

convert.c

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>

#include "sjis.txt"

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
#endif

uint16_t sjis_to_utf16be(uint16_t sjis)
{
    size_t table_size = ARRAY_SIZE(mapping_table);
    mapping_t *table = mapping_table;
    size_t left = 0;
    size_t right = table_size;

    while (left < right)
    {
        size_t mid = left + (right - left) / 2;

        if (table[mid].sjis == sjis)
        {
            return table[mid].unicode;
        }
        else if (table[mid].sjis < sjis)
        {
            left = mid + 1;
        }
        else
        {
            right = mid;
        }
    }

    return 0x20;
}

size_t utf16be_char_to_utf8(uint16_t code_point, char *utf8_buffer, size_t utf8_buffer_size)
{
    size_t utf8_index = 0;

    if (code_point <= 0x007F)
    {
        if (utf8_index + 1 <= utf8_buffer_size)
        {
            utf8_buffer[utf8_index++] = code_point & 0x7F;
        }
    }
    else if (code_point <= 0x07FF)
    {
        if (utf8_index + 2 <= utf8_buffer_size)
        {
            utf8_buffer[utf8_index++] = 0xC0 | ((code_point >> 6) & 0x1F);
            utf8_buffer[utf8_index++] = 0x80 | (code_point & 0x3F);
        }
    }
    else
    {
        if (utf8_index + 3 <= utf8_buffer_size)
        {
            utf8_buffer[utf8_index++] = 0xE0 | ((code_point >> 12) & 0x0F);
            utf8_buffer[utf8_index++] = 0x80 | ((code_point >> 6) & 0x3F);
            utf8_buffer[utf8_index++] = 0x80 | (code_point & 0x3F);
        }
    }

    utf8_buffer[utf8_index] = '\0';
    return utf8_index;
}

char *sjis_to_utf8(uint8_t *data, size_t len)
{
    size_t cur_len = len;
    char *utf8_base = malloc(cur_len);
    char *utf8 = utf8_base;

    int osize = 0;
    for (uint8_t *p = data; (p - data) < len; p += osize)
    {
        uint16_t orig;
        uint16_t converted;
        if ((*p) < 0x80 || (0xa1 <= (*p) && (*p) <= 0xdf))
        {
            osize = 1;
            orig = (uint16_t)(uint8_t)(p[0] & 0xff);
        }
        else
        {
            osize = 2;
            orig = p[1] & 0xff;
            orig += (uint16_t)((p[0] << 8) & 0xffff);
        }
        converted = sjis_to_utf16be(orig);
        uint32_t valid;
        if ((&utf8_base[cur_len] - utf8) < 5)
        {
            cur_len += 0x10;
            size_t offset = utf8 - utf8_base;
            utf8_base = realloc(utf8_base, cur_len);
            utf8 = &utf8_base[offset];
        }
        valid = utf16be_char_to_utf8(converted, utf8, 4);
        utf8[valid] = '\0';
        utf8 += valid;
    }
    return utf8_base;
}

sjis.txtはこれ

sjis.txtのオリジナルのライセンスはこれ

sjis.txtは以下を変更

  • c 形式に変更
  • 半角カナの位置を並べ替え
  • 機種依存文字の追加(8740-879c,ed40-edfc,ee40-eefc)
  • IBM拡張文字の追加(FA40-FC4B)

使い方

こんなsjisのファイルがあったとしたら

image.png

こうやると

int main(void) {

    FILE *fp = fopen("original.txt", "rb");
    fseek(fp, 0, SEEK_END);
    long len = ftell(fp);
    rewind(fp);

    char *data = malloc(len + 1); 
    fread(data, len, 1, fp);
    fclose(fp);

    char *utf8 = sjis_to_utf8(data, len);
    printf("%s\n", utf8);

    return 0;
}

こうなる
image.png

参考サイト

0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?