入力された文字の上に、rubyタグを使って文字コードを表示します。
※合字などは対応してないので分解されます
See the Pen rubyタグによる文字毎のコード表示 by Ikiuo (@ikiuo) on CodePen.
unicode-point.html
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="utf-8">
<title>Unicode コードポイント表示</title>
</head>
<body>
<table border="1">
<tr>
<td>入<br>力</td>
<td>
<textarea id="input" cols="80" rows="10" oninput="onInput()"></textarea>
</td>
</tr>
<tr>
<td>設<br>定</td>
<td>
<select id="codeset">
<option value="ucs4">UTF-32</option>
<option value="ucs2">UTF-16</option>
<option value="utf8">UTF-8</option>
</select>
<select id="base">
<option value="10">10進数</option>
<option value="16" selected>16進数</option>
<option value="x16">16進数(0xあり)</option>
</select>
</td>
</tr>
<tr>
<td>出<br>力</td>
<td id="output"></td>
</tr>
</table>
<script>
function isSurrogate(code) { return (0xd800 <= code && code < 0xe000); }
function isSurrogate1(code) { return (0xd800 <= code && code < 0xdc00); }
function isSurrogate2(code) { return (0xdc00 <= code && code < 0xe000); }
function isCountryFlagCode(ucs4) { return (0x1f1e6 <= ucs4 && ucs4 < 0x1f200); }
function isVariationSelector(code) {
return ((0xff00 <= code && code < 0xff10) ||
(0xe0100 <= code && code < 0xe1f00));
}
function toUTF8(ucs4) {
if (ucs4 < 0x80)
return [ucs4];
if (ucs4 < 0x800)
return [
(ucs4 >> 6) | 0xc0,
(ucs4 & 0x3f) | 0x80
];
if (ucs4 < 0x10000)
return [
(ucs4 >> 12) | 0xe0,
((ucs4 >> 6) & 0x3f) | 0x80,
(ucs4 & 0x3f) | 0x80,
];
return [
(ucs4 >> 18) | 0xf0,
((ucs4 >> 12) & 0x3f) | 0x80,
((ucs4 >> 6) & 0x3f) | 0x80,
(ucs4 & 0x3f) | 0x80,
];
}
function toUCS2(ucs4) {
const code = ucs4 - 0x10000;
const hi = (code >> 12) & 0x3ff;
const lo = (code >> 0) & 0x3ff;
return [hi | 0xd800, lo | 0xdc00];
}
function toUCS4(s) {
let out = new Array();
for (const c of s)
out.push(c.codePointAt(0));
return out;
}
function getTag(id) { return document.getElementById(id); }
function removeChildren(tag)
{
while (tag.firstChild)
tag.removeChild(tag.firstChild);
}
function analyze(line)
{
const gencode = function(code) {
return [String.fromCodePoint(code), [code]];
}
const genpair = function(codes) {
const ch1 = String.fromCodePoint(codes[0])
const ch2 = String.fromCodePoint(codes[1])
return [ch1 + ch2, codes];
}
let output = new Array();
let countryflag = null;
const putcode = function(code) { output.push(gencode(code)); }
const putpair = function(codes) { output.push(genpair(codes)); }
const merge = function(p, n) { return [p[0] + n[0], p[1].concat(n[1])]; }
for (const code of toUCS4(line)) {
const cf = countryflag;
countryflag = null;
if (isCountryFlagCode(code)) {
if (!cf) {
countryflag = code;
} else {
const c1 = gencode(cf);
const c2 = gencode(code);
output.push(merge(c1, c2));
}
continue;
}
if (cf) {
putcode(cf);
continue;
}
if (isVariationSelector(code)) {
if (output.length == 0) {
putcode(code);
} else {
const last = output[output.length - 1];
last[0] = last[0] + String.fromCodePoint(code);
last[1].push(code);
}
continue;
}
putcode(code);
}
if (countryflag)
putcode(countryflag);
return output;
}
function codeHex(code)
{
if (code < 0x100)
return ('00' + code.toString(16)).toUpperCase().substr(-2);
if (code < 0x10000)
return ('0000' + code.toString(16)).toUpperCase().substr(-4);
if (code < 0x1000000)
return ('000000' + code.toString(16)).toUpperCase().substr(-6);
return ('00000000' + code.toString(16)).toUpperCase().substr(-8);
}
function codeBase(code, base, prefix) {
if (base == 16)
return (prefix ? '0x' : '') + codeHex(code)
return code.toString(10);
}
function onInput()
{
const tagInp = getTag('input');
const tagOut = getTag('output');
const tagCodeSet = getTag('codeset');
const tagBase = getTag('base');
const bUCS4 = (tagCodeSet.value == 'ucs4');
const bUCS2 = (tagCodeSet.value == 'ucs2');
const bUTF8 = (tagCodeSet.value == 'utf8');
const nBase = Number(tagBase.value.substr(-2));
const bPrefix = (tagBase.value[0] == 'x');
const inp = tagInp.value;
const lines = new Array();
{
let line = '';
for (const c of tagInp.value) {
line = line + c;
if (c == '\n') {
lines.push(line);
line = '';
}
}
if (line.length)
lines.push(line);
}
removeChildren(tagOut);
for (const i in lines) {
const tagLine = document.createElement('p');
const line = analyze(lines[i]);
for (const data of line) {
const tagRuby = document.createElement('ruby');
const tagSpan = document.createElement('span');
tagSpan.innerText = data[0];
tagRuby.append(tagSpan);
const tagOpen = document.createElement('rp');
tagOpen.innerText = '(';
tagRuby.append(tagOpen);
let codes = data[1];
if (bUCS2) {
let ncodes = new Array();
for (const code of codes)
if (code >= 0x10000)
ncodes = ncodes.concat(toUCS2(code));
else
ncodes.push(code);
codes = ncodes;
} else if (bUTF8) {
let ncodes = new Array();
for (const code of codes)
ncodes = ncodes.concat(toUTF8(code));
codes = ncodes;
}
const rt = '[' + codes.map(
c => codeBase(c, nBase, bPrefix)
).join(',') + ']';
const tagRt = document.createElement('rt');
tagRt.innerText = rt;
tagRuby.append(tagRt);
const tagClose = document.createElement('rp');
tagClose.innerText = ')';
tagRuby.append(tagClose);
tagLine.append(tagRuby);
}
tagOut.append(tagLine);
}
}
window.onload = function() {
document.querySelectorAll('select').forEach(function(query) {
query.addEventListener('input', onInput);
});
onInput();
}
</script>
</body>
</html>