decode
int decode() {
const unsigned char mask[4] = {0x7f, 0x1f, 0x0f, 0x07};
unsigned char c = 0;
scanf("%c", &c);
int type = 0;
for (; type < 4 && c >= 0xff - mask[type]; type++) ;
int result = (c & mask[type]) << type * 6;
for (int i = 1; i <= type; i++) {
scanf("%c", &c);
result += (c & 0x3f) << (6 * (type - i));
}
return result;
}
encode
void encode(int x) {
unsigned char str[6] = {};
if (x < 0x80) str[0] = x;
else {
int n = 3;
char mask1 = 0xfd, mask2 = 0x01;
if (x < 0x800) {
n = 1; mask1 = 0xd0; mask2 = 0x1f;
} else if (x < 0x10000) {
n = 2; mask1 = 0xe0; mask2 = 0x0f;
}
str[0] = mask1 | ((x >> (6 * n)) & mask2);
for (int i = 1; i <= n; i++)
str[i] = 0x80 | ((x >> 6 * (n - i)) & 0x3f);
}
printf("%s", str);
}
意外とwikipedia( https://ja.wikipedia.org/wiki/UTF-8 ) がわかりやすくて便利だった