はじめに
正規表現を使ったソースの修正をしているときにexec()やmatch()の実行結果に見慣れないプロパティgroupsを見つけました。
いつの間にかJavaScriptにも名前付きグループが実装されていたようです。
そこで軽く調べてみましたら、それ以外にもES2018において色々と追加されていたようです。
何番煎じか分かりませんが、この記事では、簡単にES2018の正規表現に関する新機能の動作を検証してみたいと思います。
厳密な定義等は、ほかの記事等を見てください。
正規表現の新機能
Named Capture Groups
従来は、
(pattern)
のようにpatternにマッチしたキャプチャにアクセスするには、$1
などキャプチャが現れた順番に対応する数値でしかアクセスできませんでした。
ES2018以降では、
(?<name>pattern)
と書くとキャプチャに名前$<name>
でアクセスできるようになります。
また、exec()やmatch()の結果にgroupsプロパティが追加されるのでそれからもアクセス出来るようになります。
これは、有っても使い勝手がそれほど変わらないと思います。
分かりやすいプログラムを書くのには、重宝すると思います。
let str = "white dog 800, black dog 1000, white cat 1000, black cat 1200,";
let reG = /(?<color>white|black) (?<animal>cat|dog) (?<price>\d+)/g
let re = /(?<color>white|black) (?<animal>cat|dog) (?<price>\d+)/
let reN = /(white|black) (cat|dog) (\d+)/
let execResultG = reG.exec(str);
let execResult = re.exec(str);
let matchResultG = str.match(reG);
let matchResult = str.match(re);
let matchResultN = str.match(reN);
console.log(execResultG);
console.log(execResult);
console.log(matchResultG);
console.log(matchResult);
console.log(matchResultN);
let replaceResultName = str.replace(reG, "$<color>+$<animal>+$<price>");
let replaceResult = str.replace(reG, "$1+$2+$3");
let replaceResultN = str.replace(reN, "$1+$2+$3");
console.log(replaceResultName);
console.log(replaceResult);
console.log(replaceResultN);
execResultG, execResult, matchResultの出力例
0: "white dog 800"
1: "white"
2: "dog"
3: "800"
groups: {color: "white", animal: "dog", price: "800"}
index: 0
input: "white dog 800, black dog 1000, white cat 1000, black cat 1200,"
length: 4
matchResultGの出力例
["white dog 800", "black dog 1000", "white cat 1000", "black cat 1200"]
matchResultNの出力例
0: "white dog 800"
1: "white"
2: "dog"
3: "800"
groups: undefined
index: 0
input: "white dog 800, black dog 1000, white cat 1000, black cat 1200,"
length: 4
replaceResultName, replaceResult, replaceResultNの出力例
white+dog+800, black+dog+1000, white+cat+1000, black+cat+1200,
dotAll flag
.
は、従来では改行コードなどの一部を除いた文字にマッチしていました。
改行コードを含んだ文字にマッチさせたい場合、[\s\S]
や[^]
などを使用する必要がありました。
ES2018以降では、/.*/s
のようにsフラグを追加することで.
が改行文字などすべての文字に対応するようになります。
これも、有ったら便利だなという程度でしょうか。
何故いままでなかったのだろうかとも思います。
let str = "white dog 800\nblack dog 1000\nwhite cat 1000\nblack cat 1200\n";
let reS = /.*/s
let re = /.*/
let reB = /[\s\S]*/
let reNB = /[^]*/
let matchResultS = str.match(reS);
let matchResult = str.match(re);
let matchResultB = str.match(reB);
let matchResultNB = str.match(reNB);
console.log(str);
console.log(matchResultS);
console.log(matchResult);
console.log(matchResultB);
console.log(matchResultNB);
matchResultS, matchResultB, matchResultNBの出力例
0: "white dog 800↵black dog 1000↵white cat 1000↵black cat 1200↵"
groups: undefined
index: 0
input: "white dog 800↵black dog 1000↵white cat 1000↵black cat 1200↵"
length: 1
matchResultの出力例
0: "white dog 800"
groups: undefined
index: 0
input: "white dog 800↵black dog 1000↵white cat 1000↵black cat 1200↵"
length: 1
Lookbehind Assertions
ES2018以降では、戻り読み(Lookbehind)が実装されました。
特定のパターンに続くパターンにマッチさせる事が出来るようになりました。
以下のように書くと、
/(?<=prePattern)pattern/
prePatternが前にあるpatternにのみ、マッチするようになります。
逆に、特定のパターンに続かないパターンにマッチさせることも出来ます。
以下のように書くと、
/(?<!prePattern)pattern/
prePatternが前にないpatternにのみ、マッチするようになります。
厳密にいえば、(?<=prePattern)
は、prePatternが前に存在する位置にマッチします。
同じように、(?<!prePattern)
は、prePatternが前に存在しない位置にマッチします。
本当に使いこなそうとすると、この位置にマッチするという事を意識する必要があったりします。
あまりにも複雑な場合は、正規表現だけで解決するのを考え直した方が簡単だと思いますが。
str = "white cat1, gray cat2, black cat3, red cat4, green cat5, blue cat6, cyan cat7, magenta cat8, yellow cat9";
re = /(?<=green )cat\d/g;
reN = /(?<!green )cat\d/g;
result = str.match(re);
resultN = str.match(reN);
console.log(result);
console.log(resultN);
/(?<=green )cat\d/g;
の出力例
["cat5"]
/(?<!green )cat\d/g;
の出力例
["cat1", "cat2", "cat3", "cat4", "cat6", "cat7", "cat8", "cat9"]
位置にマッチするという動きから、patternが含まれない行にマッチさせるという事も出来るようになります。
下の記述は、典型的な例です。
戻り読みが使えない状況で正規表現だけでマッチさせられるかもしれませんが、筆者は分かりません。
この場合も、JavaScriptを使えば正規表現だけで解決する必要はないのですが。
/* str
abc cat
def dog
ghi cat
jkl dog
mno cat
123 abc cat
789 def dog
012 ghi cat
345 jkl dog
678 mno cat
//*/
str = "abc cat\ndef dog\nghi cat\njkl dog\nmno cat\n123 abc cat\n789 def dog\n012 ghi cat\n345 jkl dog\n678 mno cat\n"
re = /^((?<!abc).)+$/mg;
result = str.match(re);
console.log(result);
/^((?<!abc).)+$/mg
の出力例
["def dog", "ghi cat", "jkl dog", "mno cat", "789 def dog", "012 ghi cat", "345 jkl dog", "678 mno cat"]
余談ですが、数値の3桁毎にカンマを打つ正規表現はないかと考えたことがありました。
戻り読みがなくても実装できるのですが、戻り読みを使えばより分かりやすいのではないかと思います。
(どっちも分からんがな)
str = "1\n12\n123\n1234\n12345\n123456\n1234567\n12345678\n123456789\nabc1def\nabc12def\nabc123def\nabc1234def\nabc12345def\nabc123456def\nabc1234567def\nabc12345678def\nabc123456789def\n 123456789 123456789 123456789";
re = /(\d)(?=(?:\d{3})+(?!\d))/g;
result = str.replace(re, "$1,");
console.log(result);
re = /(?<=\d)(?=(?:\d{3})+(?!\d))/g;
result = str.replace(re, ",");
console.log(result);
/(\d)(?=(?:\d{3})+(?!\d))/g
および/(?<=\d)(?=(?:\d{3})+(?!\d))/g
の出力例
1
12
123
1,234
12,345
123,456
1,234,567
12,345,678
123,456,789
abc1def
abc12def
abc123def
abc1,234def
abc12,345def
abc123,456def
abc1,234,567def
abc12,345,678def
abc123,456,789def
123,456,789 123,456,789 123,456,789
Unicode property escapes
ES2018以降では、Unicodeプロパティを指定してマッチさせることが出来るようになりました。
これはとても便利ですね。ウヒョー
と言いたいところですが、使いこなすにはUnicodeプロパティの知識が必要そうです。
迂闊に使うと想定していない文字をマッチさせてしまいそうです。
厳密に使いたいからと色々調べていくとUnicodeの闇へようこそとなります。ウヒャー
基本的には、\p{property}
もしくは\P{property}
と記述します。
指定したpropertyを持っている文字とマッチさせたいときは、以下のように小文字で書きます。
\p{property}
逆に、指定したpropertyを持っていない文字とマッチさせたいときは、大文字を使います。
\P{property}
正規表現の本来の定義ならば、"property"に相当する記述は、大文字小文字の区別なく扱えるようになっています。
しかし、JavaScriptで扱う場合は、大文字小文字を正確に入力しなければいけません。
またJavaScriptで扱い場合は、/\p{property}/u
のようにu
フラグが必須となります。
では、具体的にpropertyにはどんな記述をするか述べていきたいと思います。
まず、Unicodeプロパティは、数多くありますが大きく分けるとbinary propertyとnon-binary propertyに分けられます。
Binaryプロパティ
binaryプロパティは、その属性を持っているどうかの値として、Yes/No(Y/N)もしくはTrue/False(T/F)を持ちます。
(厳密にいえば、Maybe(M)も設定できるようです。)
Binaryプロパティを指定する場合は、以下の形式で行います。
\p{LoneUnicodePropertyNameOrValue}
binaryプロパティのPropertyName
としてES2018で扱えるものの一部をあげると以下のようなものがあります。
-
ASCII_Hex_Digit
AHex,
-
Alphabetic
Alpha
Cased
Dash
-
Hex_Digit
Hex
Hyphen
-
Ideographic
Ideo
-
Lowercase
Lower
Math
Radical
-
Uppercase
Upper
-
White_Space
WSpace
具体的な例としては、
/\p{ASCII_Hex_Digit}+/ug;
/\p{Math}*/u;
/[\p{Alphabetic}]/u;
などになります。
non-binaryプロパティ
non-binaryプロパティは、binaryプロパティ以外のプロパティです。
non-binaryプロパティの持てる値は、プロパティごとに異なります。
ES2018で扱えるのは、次の3つです。
-
General_Category
,gc
-
Script
,sc
-
Script_Extensions
,scx
General_Category
General_Category
は、文字を大まかに分けるとどのような分類になるかを表すものです。
具体的には、文字、記号、数字、空白などの分類です。
General_Category
を指定する場合は、
\p{UnicodePropertyName=UnicodePropertyValue}
の形式か、Non-binaryプロパティの中で唯一UnicodePropertyName=
を省略することができるので、
\p{LoneUnicodePropertyNameOrValue}
と、2つの形式が有効です。
General_Category
の値
General_Category
が持てる値は、以下のようなものがあります。
Surrogate
Letter
Lowercase_Letter
Uppercase_Letter
Number
Mark
Separator
Symbol
Punctuation
Other
具体的には、
/\p{General_Category=Lowercase_Letter}+/ug;
/\p{gc=Uppercase_Letter}/u;
/[\p{Number}\p{Mark}]+/u;
などと指定できます。
Script
およびScript_Extensions
Script
およびScript_Extensions
は、
\p{UnicodePropertyName=UnicodePropertyValue}
の形式のみ指定できます。
Script
は、文字を文字体系で分けるとどのような分類になるかを表すものです。
具体的には、ラテン文字、アラビア文字、平仮名、片仮名、漢字などの分類です。
Script
は、1文字に1つの値しか持てません。
"、"や"。"などの複数の体系に現れる文字でも、Common
など何らかの1つの値に設定されています。
これで困るのが以下のような文章から、例えば平仮名で構成されている文字列だけを抜き出したいときです。
ねこだいすき、ふりすびー。cat love Frisbee!!!.
これをScript
のみで抜き出そうとして、
/\p{Script=Hiragana}+/ug;
を指定してみます。
結果は、
["ねこだいすき", "ふりすび"]
と句読点と長音記号が入りません。
では、
/[\p{Script=Hiragana}\p{Script=Common}]+/ug;
と指定してみます。
今度は、
["ねこだいすき、ふりすびー。", " ", " ", "!!!."]
と目的のものは手に入りましたが空白など余計なものも抜き出してしまっています。
そこで、複数の文字体系に現れる文字にも対応しようというのがScript_Extensions
です。
こちらも、ラテン文字、アラビア文字、平仮名、片仮名、漢字などの分類を表すものです。
Script
との違いは、1文字でも複数の値を持てるという事です。
このため"、"や"。"など複数の体系に現れる文字は、Bopo Hang Hani Hira Kana Yiii
など複数の値を持つことになります。
(注音符号(ボポモフォ)、ハングル、漢字、平仮名、片仮名、彝文字(いもじ))
さて今度は、Script_Extensions
のみを使って上記の文章から平仮名で構成されている文字列を抜き出してみます。
/\p{Script_Extensions=Hiragana}+/ug;
とすると、
["ねこだいすき、ふりすびー。"]
と目的のものだけすっきりと抜き出すことが出来ました。
(そもそも、平仮名に長音記号を使って良いのか分かりませんが、Unicodeは一般的な使われ方を採用したようです。)
Script
およびScript_Extensions
の値
Script
およびScript_Extensions
が持てる値は、以下のようなものがあります。
-
Han
漢字 -
Hiragana
平仮名 -
Katakana
片仮名 -
Common
一般
具体的には、
/\p{Script=Hiragana}/u;
/\p{Script_Extensions=Katakana}/u;
/\p{Script_Extensions=Han}/u;
などと指定できます。
プロパティと値の一覧
全てのプロパティと値が知りたい方は、こちらUnicodeプロパティ一覧をご覧ください。
Name
とValue
用語が正しいか分かりませんが、Property Name
とProperty Value
の区別をしっかり付けたほうが良さそうです。
-
ASCII_Hex_Digit
(Binaryプロパティ) -
Lowercase
(Binaryプロパティ) -
Uppercase
(Binaryプロパティ) General_Category
Script
Script_Extensions
などはProperty Name
(もしくは単にProperty
)です。
一方、
-
Lowercase_Letter
(General_Categoryの値) -
Uppercase_Letter
(General_Categoryの値) -
Math_Symbol
(General_Categoryの値) -
Punctuation
(General_Categoryの値) -
Han
(ScriptおよびScript_Extensionsの値) -
Hiragana
(ScriptおよびScript_Extensionsの値) -
Katakana
(ScriptおよびScript_Extensionsの値)
などは、Property Value
です。
つまり\p{UnicodePropertyName=UnicodePropertyValue}
とした場合の、左辺がName
で、右辺がValue
です。
Binaryプロパティは、右辺値がYes/NoだけなのでName
を調べる必要があります。
一方、Non-binaryプロパティは、左辺値がGeneral_Category
, Script
, Script_Extensions
と3つだけですが、取りうるValue
を調べる必要があります。
同じように動作する小文字を抜き出す正規表現でも以下のようにいくつか記述を変えることができます。
-
\p{Lowercase}
(Binaryプロパティ) \p{General_Category=Lowercase_Letter}
-
\p{gc=Lowercase_Letter}
(略記法) -
\p{Lowercase_Letter}
(略記法)
Name
やValue
など何を扱っているかしっかり把握しておかないと思わぬところでつまずきそうです。
指定方法に関して
\p{property}
の記述は文字クラスを表す表現なので[]
で囲むのが望ましいという記述も見かけました。
それを信じるならば、
/[\p{General_Category=Decimal_Number}]/u;
/[\p{Script=Greek}]/u;
/[\p{Script_Extensions=Han}]/u;
とするのが一番お行儀が良い書き方なようです。
(そもそもこのように書かないとエラーになる言語もあるようです。)
コード
長々と記述してきましたが、とりあえずどんな文字がどんなプロパティなのか全部まとめて調べてみました。
と言っても出力結果が膨大になりますので結果は省略します。
npm
6.7.0
babel
6.26.0
babel-core
6.26.3
node.js
v11.13.0
の環境で実行させました。
表示には、
Atom Editor
Unifont
を使いました。
重いのでAtom
じゃない方が良いかもしれないです。
Unicodeプロパティ羅列
"use babel";
import fs from "fs";
const binary = [
// { "name": "AHex", "regex": /\p{AHex}+/ug },
{ "name": "ASCII_Hex_Digit", "regex": /\p{ASCII_Hex_Digit}+/ug },
{ "name": "ASCII", "regex": /\p{ASCII}+/ug },
// { "name": "Alpha", "regex": /\p{Alpha}+/ug },
{ "name": "Alphabetic", "regex": /\p{Alphabetic}+/ug },
{ "name": "Any", "regex": /\p{Any}+/ug },
{ "name": "Assigned", "regex": /\p{Assigned}+/ug },
// { "name": "Bidi_C", "regex": /\p{Bidi_C}+/ug },
{ "name": "Bidi_Control", "regex": /\p{Bidi_Control}+/ug },
// { "name": "Bidi_M", "regex": /\p{Bidi_M}+/ug },
{ "name": "Bidi_Mirrored", "regex": /\p{Bidi_Mirrored}+/ug },
// { "name": "CE", "regex": /\p{CE}+/ug }, // 未実装
// { "name": "Composition_Exclusion", "regex": /\p{Composition_Exclusion}+/ug }, // 未実装
// { "name": "CI", "regex": /\p{CI}+/ug },
{ "name": "Case_Ignorable", "regex": /\p{Case_Ignorable}+/ug },
{ "name": "Cased", "regex": /\p{Cased}+/ug },
// { "name": "CWCF", "regex": /\p{CWCF}+/ug },
{ "name": "Changes_When_Casefolded", "regex": /\p{Changes_When_Casefolded}+/ug },
// { "name": "CWCM", "regex": /\p{CWCM}+/ug },
{ "name": "Changes_When_Casemapped", "regex": /\p{Changes_When_Casemapped}+/ug },
// { "name": "CWL", "regex": /\p{CWL}+/ug },
{ "name": "Changes_When_Lowercased", "regex": /\p{Changes_When_Lowercased}+/ug },
// { "name": "CWKCF", "regex": /\p{CWKCF}+/ug },
{ "name": "Changes_When_NFKC_Casefolded", "regex": /\p{Changes_When_NFKC_Casefolded}+/ug },
// { "name": "CWL", "regex": /\p{CWL}+/ug },
{ "name": "Changes_When_Lowercased", "regex": /\p{Changes_When_Lowercased}+/ug },
// { "name": "CWT", "regex": /\p{CWT}+/ug },
{ "name": "Changes_When_Titlecased", "regex": /\p{Changes_When_Titlecased}+/ug },
// { "name": "CWU", "regex": /\p{CWU}+/ug },
{ "name": "Changes_When_Uppercased", "regex": /\p{Changes_When_Uppercased}+/ug },
{ "name": "Dash", "regex": /\p{Dash}+/ug },
// { "name": "DI", "regex": /\p{DI}+/ug },
{ "name": "Default_Ignorable_Code_Point", "regex": /\p{Default_Ignorable_Code_Point}+/ug },
// { "name": "Dep", "regex": /\p{Dep}+/ug },
{ "name": "Deprecated", "regex": /\p{Deprecated}+/ug },
// { "name": "Dia", "regex": /\p{Dia}+/ug },
{ "name": "Diacritic", "regex": /\p{Diacritic}+/ug },
// { "name": "EComp", "regex": /\p{EComp}+/ug },
{ "name": "Emoji_Component", "regex": /\p{Emoji_Component}+/ug },
// { "name": "EBase", "regex": /\p{EBase}+/ug },
{ "name": "Emoji_Modifier_Base", "regex": /\p{Emoji_Modifier_Base}+/ug },
// { "name": "EMod", "regex": /\p{EMod}+/ug },
{ "name": "Emoji_Modifier", "regex": /\p{Emoji_Modifier}+/ug },
// { "name": "EPres", "regex": /\p{EPres}+/ug },
{ "name": "Emoji_Presentation", "regex": /\p{Emoji_Presentation}+/ug },
{ "name": "Emoji", "regex": /\p{Emoji}+/ug },
// { "name": "Ext", "regex": /\p{Ext}+/ug },
{ "name": "Extender", "regex": /\p{Extender}+/ug },
// { "name": "Gr_Base", "regex": /\p{Gr_Base}+/ug },
{ "name": "Grapheme_Base", "regex": /\p{Grapheme_Base}+/ug },
// { "name": "Gr_Ext", "regex": /\p{Gr_Ext}+/ug },
{ "name": "Grapheme_Extend", "regex": /\p{Grapheme_Extend}+/ug },
// { "name": "Gr_Link", "regex": /\p{Gr_Link}+/ug }, // 未実装
// { "name": "Grapheme_Link", "regex": /\p{Grapheme_Link}+/ug }, // 未実装
// { "name": "Hex", "regex": /\p{Hex}+/ug },
{ "name": "Hex_Digit", "regex": /\p{Hex_Digit}+/ug },
// { "name": "Hyphen", "regex": /\p{Hyphen}+/ug },
// { "name": "IDC", "regex": /\p{IDC}+/ug },
{ "name": "ID_Continue", "regex": /\p{ID_Continue}+/ug },
// { "name": "IDSB", "regex": /\p{IDSB}+/ug },
{ "name": "IDS_Binary_Operator", "regex": /\p{IDS_Binary_Operator}+/ug },
// { "name": "IDST", "regex": /\p{IDST}+/ug },
{ "name": "IDS_Trinary_Operator", "regex": /\p{IDS_Trinary_Operator}+/ug },
// { "name": "IDC", "regex": /\p{IDC}+/ug },
{ "name": "ID_Continue", "regex": /\p{ID_Continue}+/ug },
// { "name": "IDS", "regex": /\p{IDS}+/ug },
{ "name": "ID_Start", "regex": /\p{ID_Start}+/ug },
// { "name": "Ideo", "regex": /\p{Ideo}+/ug },
{ "name": "Ideographic", "regex": /\p{Ideographic}+/ug },
// { "name": "Join_C", "regex": /\p{Join_C}+/ug },
{ "name": "Join_Control", "regex": /\p{Join_Control}+/ug },
// { "name": "LOE", "regex": /\p{LOE}+/ug },
{ "name": "Logical_Order_Exception", "regex": /\p{Logical_Order_Exception}+/ug },
// { "name": "Lower", "regex": /\p{Lower}+/ug },
{ "name": "Lowercase", "regex": /\p{Lowercase}+/ug },
{ "name": "Math", "regex": /\p{Math}+/ug },
// { "name": "NChar", "regex": /\p{NChar}+/ug },
{ "name": "Noncharacter_Code_Point", "regex": /\p{Noncharacter_Code_Point}+/ug },
// { "name": "OAlpha", "regex": /\p{OAlpha}+/ug }, // 未実装
// { "name": "Other_Alphabetic", "regex": /\p{Other_Alphabetic}+/ug }, // 未実装
// { "name": "ODI", "regex": /\p{ODI}+/ug }, // 未実装
// { "name": "Other_Default_Ignorable_Code_Point", "regex": /\p{Other_Default_Ignorable_Code_Point}+/ug }, // 未実装
// { "name": "OGr_Ext", "regex": /\p{OGr_Ext}+/ug }, // 未実装
// { "name": "Other_Grapheme_Extend", "regex": /\p{Other_Grapheme_Extend}+/ug }, // 未実装
// { "name": "OIDC", "regex": /\p{OIDC}+/ug }, // 未実装
// { "name": "Other_ID_Continue", "regex": /\p{Other_ID_Continue}+/ug }, // 未実装
// { "name": "OIDS", "regex": /\p{OIDS}+/ug }, // 未実装
// { "name": "Other_ID_Start", "regex": /\p{Other_ID_Start}+/ug }, // 未実装
// { "name": "OLower", "regex": /\p{OLower}+/ug }, // 未実装
// { "name": "Other_Lowercase", "regex": /\p{Other_Lowercase}+/ug }, // 未実装
// { "name": "OMath", "regex": /\p{OMath}+/ug }, // 未実装
// { "name": "Other_Math", "regex": /\p{Other_Math}+/ug }, // 未実装
// { "name": "OUpper", "regex": /\p{OUpper}+/ug }, // 未実装
// { "name": "Other_Uppercase", "regex": /\p{Other_Uppercase}+/ug }, // 未実装
// { "name": "PCM", "regex": /\p{PCM}+/ug }, // 未実装
// { "name": "Prepended_Concatenation_Mark", "regex": /\p{Prepended_Concatenation_Mark}+/ug }, // 未実装
// { "name": "Pat_Syn", "regex": /\p{Pat_Syn}+/ug },
{ "name": "Pattern_Syntax", "regex": /\p{Pattern_Syntax}+/ug },
// { "name": "Pat_WS", "regex": /\p{Pat_WS}+/ug },
{ "name": "Pattern_White_Space", "regex": /\p{Pattern_White_Space}+/ug },
// { "name": "QMark", "regex": /\p{QMark}+/ug },
{ "name": "Quotation_Mark", "regex": /\p{Quotation_Mark}+/ug },
{ "name": "Radical", "regex": /\p{Radical}+/ug },
// { "name": "RI", "regex": /\p{RI}+/ug },
{ "name": "Regional_Indicator", "regex": /\p{Regional_Indicator}+/ug },
// { "name": "STerm", "regex": /\p{STerm}+/ug },
{ "name": "Sentence_Terminal", "regex": /\p{Sentence_Terminal}+/ug },
// { "name": "SD", "regex": /\p{SD}+/ug },
{ "name": "Soft_Dotted", "regex": /\p{Soft_Dotted}+/ug },
// { "name": "STerm", "regex": /\p{STerm}+/ug },
{ "name": "Sentence_Terminal", "regex": /\p{Sentence_Terminal}+/ug },
// { "name": "Term", "regex": /\p{Term}+/ug },
{ "name": "Terminal_Punctuation", "regex": /\p{Terminal_Punctuation}+/ug },
// { "name": "UIdeo", "regex": /\p{UIdeo}+/ug },
{ "name": "Unified_Ideograph", "regex": /\p{Unified_Ideograph}+/ug },
// { "name": "Upper", "regex": /\p{Upper}+/ug },
{ "name": "Uppercase", "regex": /\p{Uppercase}+/ug },
// { "name": "VS", "regex": /\p{VS}+/ug },
{ "name": "Variation_Selector", "regex": /\p{Variation_Selector}+/ug },
// { "name": "WSpace", "regex": /\p{WSpace}+/ug },
{ "name": "White_Space", "regex": /\p{White_Space}+/ug },
// { "name": "space", "regex": /\p{space}+/ug },
// { "name": "XIDC", "regex": /\p{XIDC}+/ug },
{ "name": "XID_Continue", "regex": /\p{XID_Continue}+/ug },
// { "name": "XIDS", "regex": /\p{XIDS}+/ug },
{ "name": "XID_Start", "regex": /\p{XID_Start}+/ug },
];
const generalCategory = [
// { "name": "Cc", "regex": /\p{gc=Cc}+/ug },
{ "name": "Control", "regex": /\p{gc=Control}+/ug },
// { "name": "cntrl", "regex": /\p{gc=cntrl}+/ug },
// { "name": "Cf", "regex": /\p{gc=Cf}+/ug },
{ "name": "Format", "regex": /\p{gc=Format}+/ug },
// { "name": "Cn", "regex": /\p{gc=Cn}+/ug },
{ "name": "Unassigned", "regex": /\p{gc=Unassigned}+/ug },
// { "name": "Co", "regex": /\p{gc=Co}+/ug },
{ "name": "Private_Use", "regex": /\p{gc=Private_Use}+/ug },
// { "name": "Cs", "regex": /\p{gc=Cs}+/ug },
{ "name": "Surrogate", "regex": /\p{gc=Surrogate}+/ug },
// { "name": "C", "regex": /\p{gc=C}+/ug },
{ "name": "Other", "regex": /\p{gc=Other}+/ug },
// { "name": "LC", "regex": /\p{gc=LC}+/ug },
{ "name": "Cased_Letter", "regex": /\p{gc=Cased_Letter}+/ug },
// { "name": "Ll", "regex": /\p{gc=Ll}+/ug },
{ "name": "Lowercase_Letter", "regex": /\p{gc=Lowercase_Letter}+/ug },
// { "name": "Lm", "regex": /\p{gc=Lm}+/ug },
{ "name": "Modifier_Letter", "regex": /\p{gc=Modifier_Letter}+/ug },
// { "name": "Lo", "regex": /\p{gc=Lo}+/ug },
{ "name": "Other_Letter", "regex": /\p{gc=Other_Letter}+/ug },
// { "name": "Lt", "regex": /\p{gc=Lt}+/ug },
{ "name": "Titlecase_Letter", "regex": /\p{gc=Titlecase_Letter}+/ug },
// { "name": "Lu", "regex": /\p{gc=Lu}+/ug },
{ "name": "Uppercase_Letter", "regex": /\p{gc=Uppercase_Letter}+/ug },
// { "name": "L", "regex": /\p{gc=L}+/ug },
{ "name": "Letter", "regex": /\p{gc=Letter}+/ug },
// { "name": "Mc", "regex": /\p{gc=Mc}+/ug },
{ "name": "Spacing_Mark", "regex": /\p{gc=Spacing_Mark}+/ug },
// { "name": "Me", "regex": /\p{gc=Me}+/ug },
{ "name": "Enclosing_Mark", "regex": /\p{gc=Enclosing_Mark}+/ug },
// { "name": "Mn", "regex": /\p{gc=Mn}+/ug },
{ "name": "Nonspacing_Mark", "regex": /\p{gc=Nonspacing_Mark}+/ug },
// { "name": "M", "regex": /\p{gc=M}+/ug },
{ "name": "Mark", "regex": /\p{gc=Mark}+/ug },
// { "name": "Combining_Mark", "regex": /\p{gc=Combining_Mark}+/ug },
// { "name": "Nd", "regex": /\p{gc=Nd}+/ug },
{ "name": "Decimal_Number", "regex": /\p{gc=Decimal_Number}+/ug },
// { "name": "digit", "regex": /\p{gc=digit}+/ug },
// { "name": "Nl", "regex": /\p{gc=Nl}+/ug },
{ "name": "Letter_Number", "regex": /\p{gc=Letter_Number}+/ug },
// { "name": "No", "regex": /\p{gc=No}+/ug },
{ "name": "Other_Number", "regex": /\p{gc=Other_Number}+/ug },
// { "name": "N", "regex": /\p{gc=N}+/ug },
{ "name": "Number", "regex": /\p{gc=Number}+/ug },
// { "name": "Pc", "regex": /\p{gc=Pc}+/ug },
{ "name": "Connector_Punctuation", "regex": /\p{gc=Connector_Punctuation}+/ug },
// { "name": "Pd", "regex": /\p{gc=Pd}+/ug },
{ "name": "Dash_Punctuation", "regex": /\p{gc=Dash_Punctuation}+/ug },
// { "name": "Pe", "regex": /\p{gc=Pe}+/ug },
{ "name": "Close_Punctuation", "regex": /\p{gc=Close_Punctuation}+/ug },
// { "name": "Pf", "regex": /\p{gc=Pf}+/ug },
{ "name": "Final_Punctuation", "regex": /\p{gc=Final_Punctuation}+/ug },
// { "name": "Pi", "regex": /\p{gc=Pi}+/ug },
{ "name": "Initial_Punctuation", "regex": /\p{gc=Initial_Punctuation}+/ug },
// { "name": "Po", "regex": /\p{gc=Po}+/ug },
{ "name": "Other_Punctuation", "regex": /\p{gc=Other_Punctuation}+/ug },
// { "name": "Ps", "regex": /\p{gc=Ps}+/ug },
{ "name": "Open_Punctuation", "regex": /\p{gc=Open_Punctuation}+/ug },
// { "name": "P", "regex": /\p{gc=P}+/ug },
{ "name": "Punctuation", "regex": /\p{gc=Punctuation}+/ug },
// { "name": "punct", "regex": /\p{gc=punct}+/ug },
// { "name": "Sc", "regex": /\p{gc=Sc}+/ug },
{ "name": "Currency_Symbol", "regex": /\p{gc=Currency_Symbol}+/ug },
// { "name": "Sk", "regex": /\p{gc=Sk}+/ug },
{ "name": "Modifier_Symbol", "regex": /\p{gc=Modifier_Symbol}+/ug },
// { "name": "Sm", "regex": /\p{gc=Sm}+/ug },
{ "name": "Math_Symbol", "regex": /\p{gc=Math_Symbol}+/ug },
// { "name": "So", "regex": /\p{gc=So}+/ug },
{ "name": "Other_Symbol", "regex": /\p{gc=Other_Symbol}+/ug },
// { "name": "S", "regex": /\p{gc=S}+/ug },
{ "name": "Symbol", "regex": /\p{gc=Symbol}+/ug },
// { "name": "Zl", "regex": /\p{gc=Zl}+/ug },
{ "name": "Line_Separator", "regex": /\p{gc=Line_Separator}+/ug },
// { "name": "Zp", "regex": /\p{gc=Zp}+/ug },
{ "name": "Paragraph_Separator", "regex": /\p{gc=Paragraph_Separator}+/ug },
// { "name": "Zs", "regex": /\p{gc=Zs}+/ug },
{ "name": "Space_Separator", "regex": /\p{gc=Space_Separator}+/ug },
// { "name": "Z", "regex": /\p{gc=Z}+/ug },
{ "name": "Separator", "regex": /\p{gc=Separator}+/ug },
];
const script = [
// { "name": "Adlm", "regex": /\p{sc=Adlm}+/ug },
{ "name": "Adlam", "regex": /\p{sc=Adlam}+/ug },
// { "name": "Aghb", "regex": /\p{sc=Aghb}+/ug },
{ "name": "Caucasian_Albanian", "regex": /\p{sc=Caucasian_Albanian}+/ug },
{ "name": "Ahom", "regex": /\p{sc=Ahom}+/ug },
// { "name": "Arab", "regex": /\p{sc=Arab}+/ug },
{ "name": "Arabic", "regex": /\p{sc=Arabic}+/ug },
// { "name": "Armi", "regex": /\p{sc=Armi}+/ug },
{ "name": "Imperial_Aramaic", "regex": /\p{sc=Imperial_Aramaic}+/ug },
// { "name": "Armn", "regex": /\p{sc=Armn}+/ug },
{ "name": "Armenian", "regex": /\p{sc=Armenian}+/ug },
// { "name": "Avst", "regex": /\p{sc=Avst}+/ug },
{ "name": "Avestan", "regex": /\p{sc=Avestan}+/ug },
// { "name": "Bali", "regex": /\p{sc=Bali}+/ug },
{ "name": "Balinese", "regex": /\p{sc=Balinese}+/ug },
// { "name": "Bamu", "regex": /\p{sc=Bamu}+/ug },
{ "name": "Bamum", "regex": /\p{sc=Bamum}+/ug },
// { "name": "Bass", "regex": /\p{sc=Bass}+/ug },
{ "name": "Bassa_Vah", "regex": /\p{sc=Bassa_Vah}+/ug },
// { "name": "Batk", "regex": /\p{sc=Batk}+/ug },
{ "name": "Batak", "regex": /\p{sc=Batak}+/ug },
// { "name": "Beng", "regex": /\p{sc=Beng}+/ug },
{ "name": "Bengali", "regex": /\p{sc=Bengali}+/ug },
// { "name": "Bhks", "regex": /\p{sc=Bhks}+/ug },
{ "name": "Bhaiksuki", "regex": /\p{sc=Bhaiksuki}+/ug },
// { "name": "Bopo", "regex": /\p{sc=Bopo}+/ug },
{ "name": "Bopomofo", "regex": /\p{sc=Bopomofo}+/ug },
// { "name": "Brah", "regex": /\p{sc=Brah}+/ug },
{ "name": "Brahmi", "regex": /\p{sc=Brahmi}+/ug },
// { "name": "Brai", "regex": /\p{sc=Brai}+/ug },
{ "name": "Braille", "regex": /\p{sc=Braille}+/ug },
// { "name": "Bugi", "regex": /\p{sc=Bugi}+/ug },
{ "name": "Buginese", "regex": /\p{sc=Buginese}+/ug },
// { "name": "Buhd", "regex": /\p{sc=Buhd}+/ug },
{ "name": "Buhid", "regex": /\p{sc=Buhid}+/ug },
// { "name": "Cakm", "regex": /\p{sc=Cakm}+/ug },
{ "name": "Chakma", "regex": /\p{sc=Chakma}+/ug },
// { "name": "Cans", "regex": /\p{sc=Cans}+/ug },
{ "name": "Canadian_Aboriginal", "regex": /\p{sc=Canadian_Aboriginal}+/ug },
// { "name": "Cari", "regex": /\p{sc=Cari}+/ug },
{ "name": "Carian", "regex": /\p{sc=Carian}+/ug },
{ "name": "Cham", "regex": /\p{sc=Cham}+/ug },
// { "name": "Cher", "regex": /\p{sc=Cher}+/ug },
{ "name": "Cherokee", "regex": /\p{sc=Cherokee}+/ug },
// { "name": "Copt", "regex": /\p{sc=Copt}+/ug },
{ "name": "Coptic", "regex": /\p{sc=Coptic}+/ug },
// { "name": "Qaac", "regex": /\p{sc=Qaac}+/ug },
// { "name": "Cprt", "regex": /\p{sc=Cprt}+/ug },
{ "name": "Cypriot", "regex": /\p{sc=Cypriot}+/ug },
// { "name": "Cyrl", "regex": /\p{sc=Cyrl}+/ug },
{ "name": "Cyrillic", "regex": /\p{sc=Cyrillic}+/ug },
// { "name": "Deva", "regex": /\p{sc=Deva}+/ug },
{ "name": "Devanagari", "regex": /\p{sc=Devanagari}+/ug },
// { "name": "Dogr", "regex": /\p{sc=Dogr}+/ug },
{ "name": "Dogra", "regex": /\p{sc=Dogra}+/ug },
// { "name": "Dsrt", "regex": /\p{sc=Dsrt}+/ug },
{ "name": "Deseret", "regex": /\p{sc=Deseret}+/ug },
// { "name": "Dupl", "regex": /\p{sc=Dupl}+/ug },
{ "name": "Duployan", "regex": /\p{sc=Duployan}+/ug },
// { "name": "Egyp", "regex": /\p{sc=Egyp}+/ug },
{ "name": "Egyptian_Hieroglyphs", "regex": /\p{sc=Egyptian_Hieroglyphs}+/ug },
// { "name": "Elba", "regex": /\p{sc=Elba}+/ug },
{ "name": "Elbasan", "regex": /\p{sc=Elbasan}+/ug },
// { "name": "Elym", "regex": /\p{sc=Elym}+/ug }, //未実装
// { "name": "Elymaic", "regex": /\p{sc=Elymaic}+/ug }, //未実装
// { "name": "Ethi", "regex": /\p{sc=Ethi}+/ug },
{ "name": "Ethiopic", "regex": /\p{sc=Ethiopic}+/ug },
// { "name": "Geor", "regex": /\p{sc=Geor}+/ug },
{ "name": "Georgian", "regex": /\p{sc=Georgian}+/ug },
// { "name": "Glag", "regex": /\p{sc=Glag}+/ug },
{ "name": "Glagolitic", "regex": /\p{sc=Glagolitic}+/ug },
// { "name": "Gong", "regex": /\p{sc=Gong}+/ug },
{ "name": "Gunjala_Gondi", "regex": /\p{sc=Gunjala_Gondi}+/ug },
// { "name": "Gonm", "regex": /\p{sc=Gonm}+/ug },
{ "name": "Masaram_Gondi", "regex": /\p{sc=Masaram_Gondi}+/ug },
// { "name": "Goth", "regex": /\p{sc=Goth}+/ug },
{ "name": "Gothic", "regex": /\p{sc=Gothic}+/ug },
// { "name": "Gran", "regex": /\p{sc=Gran}+/ug },
{ "name": "Grantha", "regex": /\p{sc=Grantha}+/ug },
// { "name": "Grek", "regex": /\p{sc=Grek}+/ug },
{ "name": "Greek", "regex": /\p{sc=Greek}+/ug },
// { "name": "Gujr", "regex": /\p{sc=Gujr}+/ug },
{ "name": "Gujarati", "regex": /\p{sc=Gujarati}+/ug },
// { "name": "Guru", "regex": /\p{sc=Guru}+/ug },
{ "name": "Gurmukhi", "regex": /\p{sc=Gurmukhi}+/ug },
// { "name": "Hang", "regex": /\p{sc=Hang}+/ug },
{ "name": "Hangul", "regex": /\p{sc=Hangul}+/ug },
// { "name": "Hani", "regex": /\p{sc=Hani}+/ug },
{ "name": "Han", "regex": /\p{sc=Han}+/ug },
// { "name": "Hano", "regex": /\p{sc=Hano}+/ug },
{ "name": "Hanunoo", "regex": /\p{sc=Hanunoo}+/ug },
// { "name": "Hatr", "regex": /\p{sc=Hatr}+/ug },
{ "name": "Hatran", "regex": /\p{sc=Hatran}+/ug },
// { "name": "Hebr", "regex": /\p{sc=Hebr}+/ug },
{ "name": "Hebrew", "regex": /\p{sc=Hebrew}+/ug },
// { "name": "Hira", "regex": /\p{sc=Hira}+/ug },
{ "name": "Hiragana", "regex": /\p{sc=Hiragana}+/ug },
// { "name": "Hluw", "regex": /\p{sc=Hluw}+/ug },
{ "name": "Anatolian_Hieroglyphs", "regex": /\p{sc=Anatolian_Hieroglyphs}+/ug },
// { "name": "Hmng", "regex": /\p{sc=Hmng}+/ug },
{ "name": "Pahawh_Hmong", "regex": /\p{sc=Pahawh_Hmong}+/ug },
// { "name": "Hmnp", "regex": /\p{sc=Hmnp}+/ug }, //未実装
// { "name": "Nyiakeng_Puachue_Hmong", "regex": /\p{sc=Nyiakeng_Puachue_Hmong}+/ug }, //未実装
// { "name": "Hrkt", "regex": /\p{sc=Hrkt}+/ug }, //未実装
// { "name": "Katakana_Or_Hiragana", "regex": /\p{sc=Katakana_Or_Hiragana}+/ug }, //未実装
// { "name": "Hung", "regex": /\p{sc=Hung}+/ug },
{ "name": "Old_Hungarian", "regex": /\p{sc=Old_Hungarian}+/ug },
// { "name": "Ital", "regex": /\p{sc=Ital}+/ug },
{ "name": "Old_Italic", "regex": /\p{sc=Old_Italic}+/ug },
// { "name": "Java", "regex": /\p{sc=Java}+/ug },
{ "name": "Javanese", "regex": /\p{sc=Javanese}+/ug },
// { "name": "Kali", "regex": /\p{sc=Kali}+/ug },
{ "name": "Kayah_Li", "regex": /\p{sc=Kayah_Li}+/ug },
// { "name": "Kana", "regex": /\p{sc=Kana}+/ug },
{ "name": "Katakana", "regex": /\p{sc=Katakana}+/ug },
// { "name": "Khar", "regex": /\p{sc=Khar}+/ug },
{ "name": "Kharoshthi", "regex": /\p{sc=Kharoshthi}+/ug },
// { "name": "Khmr", "regex": /\p{sc=Khmr}+/ug },
{ "name": "Khmer", "regex": /\p{sc=Khmer}+/ug },
// { "name": "Khoj", "regex": /\p{sc=Khoj}+/ug },
{ "name": "Khojki", "regex": /\p{sc=Khojki}+/ug },
// { "name": "Knda", "regex": /\p{sc=Knda}+/ug },
{ "name": "Kannada", "regex": /\p{sc=Kannada}+/ug },
// { "name": "Kthi", "regex": /\p{sc=Kthi}+/ug },
{ "name": "Kaithi", "regex": /\p{sc=Kaithi}+/ug },
// { "name": "Lana", "regex": /\p{sc=Lana}+/ug },
{ "name": "Tai_Tham", "regex": /\p{sc=Tai_Tham}+/ug },
// { "name": "Laoo", "regex": /\p{sc=Laoo}+/ug },
{ "name": "Lao", "regex": /\p{sc=Lao}+/ug },
// { "name": "Latn", "regex": /\p{sc=Latn}+/ug },
{ "name": "Latin", "regex": /\p{sc=Latin}+/ug },
// { "name": "Lepc", "regex": /\p{sc=Lepc}+/ug },
{ "name": "Lepcha", "regex": /\p{sc=Lepcha}+/ug },
// { "name": "Limb", "regex": /\p{sc=Limb}+/ug },
{ "name": "Limbu", "regex": /\p{sc=Limbu}+/ug },
// { "name": "Lina", "regex": /\p{sc=Lina}+/ug },
{ "name": "Linear_A", "regex": /\p{sc=Linear_A}+/ug },
// { "name": "Linb", "regex": /\p{sc=Linb}+/ug },
{ "name": "Linear_B", "regex": /\p{sc=Linear_B}+/ug },
{ "name": "Lisu", "regex": /\p{sc=Lisu}+/ug },
// { "name": "Lyci", "regex": /\p{sc=Lyci}+/ug },
{ "name": "Lycian", "regex": /\p{sc=Lycian}+/ug },
// { "name": "Lydi", "regex": /\p{sc=Lydi}+/ug },
{ "name": "Lydian", "regex": /\p{sc=Lydian}+/ug },
// { "name": "Mahj", "regex": /\p{sc=Mahj}+/ug },
{ "name": "Mahajani", "regex": /\p{sc=Mahajani}+/ug },
// { "name": "Maka", "regex": /\p{sc=Maka}+/ug },
{ "name": "Makasar", "regex": /\p{sc=Makasar}+/ug },
// { "name": "Mand", "regex": /\p{sc=Mand}+/ug },
{ "name": "Mandaic", "regex": /\p{sc=Mandaic}+/ug },
// { "name": "Mani", "regex": /\p{sc=Mani}+/ug },
{ "name": "Manichaean", "regex": /\p{sc=Manichaean}+/ug },
// { "name": "Marc", "regex": /\p{sc=Marc}+/ug },
{ "name": "Marchen", "regex": /\p{sc=Marchen}+/ug },
// { "name": "Medf", "regex": /\p{sc=Medf}+/ug },
{ "name": "Medefaidrin", "regex": /\p{sc=Medefaidrin}+/ug },
// { "name": "Mend", "regex": /\p{sc=Mend}+/ug },
{ "name": "Mende_Kikakui", "regex": /\p{sc=Mende_Kikakui}+/ug },
// { "name": "Merc", "regex": /\p{sc=Merc}+/ug },
{ "name": "Meroitic_Cursive", "regex": /\p{sc=Meroitic_Cursive}+/ug },
// { "name": "Mero", "regex": /\p{sc=Mero}+/ug },
{ "name": "Meroitic_Hieroglyphs", "regex": /\p{sc=Meroitic_Hieroglyphs}+/ug },
// { "name": "Mlym", "regex": /\p{sc=Mlym}+/ug },
{ "name": "Malayalam", "regex": /\p{sc=Malayalam}+/ug },
{ "name": "Modi", "regex": /\p{sc=Modi}+/ug },
// { "name": "Mong", "regex": /\p{sc=Mong}+/ug },
{ "name": "Mongolian", "regex": /\p{sc=Mongolian}+/ug },
// { "name": "Mroo", "regex": /\p{sc=Mroo}+/ug },
{ "name": "Mro", "regex": /\p{sc=Mro}+/ug },
// { "name": "Mtei", "regex": /\p{sc=Mtei}+/ug },
{ "name": "Meetei_Mayek", "regex": /\p{sc=Meetei_Mayek}+/ug },
// { "name": "Mult", "regex": /\p{sc=Mult}+/ug },
{ "name": "Multani", "regex": /\p{sc=Multani}+/ug },
// { "name": "Mymr", "regex": /\p{sc=Mymr}+/ug },
{ "name": "Myanmar", "regex": /\p{sc=Myanmar}+/ug },
// { "name": "Nand", "regex": /\p{sc=Nand}+/ug }, //未実装
// { "name": "Nandinagari", "regex": /\p{sc=Nandinagari}+/ug }, //未実装
// { "name": "Narb", "regex": /\p{sc=Narb}+/ug },
{ "name": "Old_North_Arabian", "regex": /\p{sc=Old_North_Arabian}+/ug },
// { "name": "Nbat", "regex": /\p{sc=Nbat}+/ug },
{ "name": "Nabataean", "regex": /\p{sc=Nabataean}+/ug },
{ "name": "Newa", "regex": /\p{sc=Newa}+/ug },
// { "name": "Nkoo", "regex": /\p{sc=Nkoo}+/ug },
{ "name": "Nko", "regex": /\p{sc=Nko}+/ug },
// { "name": "Nshu", "regex": /\p{sc=Nshu}+/ug },
{ "name": "Nushu", "regex": /\p{sc=Nushu}+/ug },
// { "name": "Ogam", "regex": /\p{sc=Ogam}+/ug },
{ "name": "Ogham", "regex": /\p{sc=Ogham}+/ug },
// { "name": "Olck", "regex": /\p{sc=Olck}+/ug },
{ "name": "Ol_Chiki", "regex": /\p{sc=Ol_Chiki}+/ug },
// { "name": "Orkh", "regex": /\p{sc=Orkh}+/ug },
{ "name": "Old_Turkic", "regex": /\p{sc=Old_Turkic}+/ug },
// { "name": "Orya", "regex": /\p{sc=Orya}+/ug },
{ "name": "Oriya", "regex": /\p{sc=Oriya}+/ug },
// { "name": "Osge", "regex": /\p{sc=Osge}+/ug },
{ "name": "Osage", "regex": /\p{sc=Osage}+/ug },
// { "name": "Osma", "regex": /\p{sc=Osma}+/ug },
{ "name": "Osmanya", "regex": /\p{sc=Osmanya}+/ug },
// { "name": "Palm", "regex": /\p{sc=Palm}+/ug },
{ "name": "Palmyrene", "regex": /\p{sc=Palmyrene}+/ug },
// { "name": "Pauc", "regex": /\p{sc=Pauc}+/ug },
{ "name": "Pau_Cin_Hau", "regex": /\p{sc=Pau_Cin_Hau}+/ug },
// { "name": "Perm", "regex": /\p{sc=Perm}+/ug },
{ "name": "Old_Permic", "regex": /\p{sc=Old_Permic}+/ug },
// { "name": "Phag", "regex": /\p{sc=Phag}+/ug },
{ "name": "Phags_Pa", "regex": /\p{sc=Phags_Pa}+/ug },
// { "name": "Phli", "regex": /\p{sc=Phli}+/ug },
{ "name": "Inscriptional_Pahlavi", "regex": /\p{sc=Inscriptional_Pahlavi}+/ug },
// { "name": "Phlp", "regex": /\p{sc=Phlp}+/ug },
{ "name": "Psalter_Pahlavi", "regex": /\p{sc=Psalter_Pahlavi}+/ug },
// { "name": "Phnx", "regex": /\p{sc=Phnx}+/ug },
{ "name": "Phoenician", "regex": /\p{sc=Phoenician}+/ug },
// { "name": "Plrd", "regex": /\p{sc=Plrd}+/ug },
{ "name": "Miao", "regex": /\p{sc=Miao}+/ug },
// { "name": "Prti", "regex": /\p{sc=Prti}+/ug },
{ "name": "Inscriptional_Parthian", "regex": /\p{sc=Inscriptional_Parthian}+/ug },
// { "name": "Rjng", "regex": /\p{sc=Rjng}+/ug },
{ "name": "Rejang", "regex": /\p{sc=Rejang}+/ug },
// { "name": "Rohg", "regex": /\p{sc=Rohg}+/ug },
{ "name": "Hanifi_Rohingya", "regex": /\p{sc=Hanifi_Rohingya}+/ug },
// { "name": "Runr", "regex": /\p{sc=Runr}+/ug },
{ "name": "Runic", "regex": /\p{sc=Runic}+/ug },
// { "name": "Samr", "regex": /\p{sc=Samr}+/ug },
{ "name": "Samaritan", "regex": /\p{sc=Samaritan}+/ug },
// { "name": "Sarb", "regex": /\p{sc=Sarb}+/ug },
{ "name": "Old_South_Arabian", "regex": /\p{sc=Old_South_Arabian}+/ug },
// { "name": "Saur", "regex": /\p{sc=Saur}+/ug },
{ "name": "Saurashtra", "regex": /\p{sc=Saurashtra}+/ug },
// { "name": "Sgnw", "regex": /\p{sc=Sgnw}+/ug },
{ "name": "SignWriting", "regex": /\p{sc=SignWriting}+/ug },
// { "name": "Shaw", "regex": /\p{sc=Shaw}+/ug },
{ "name": "Shavian", "regex": /\p{sc=Shavian}+/ug },
// { "name": "Shrd", "regex": /\p{sc=Shrd}+/ug },
{ "name": "Sharada", "regex": /\p{sc=Sharada}+/ug },
// { "name": "Sidd", "regex": /\p{sc=Sidd}+/ug },
{ "name": "Siddham", "regex": /\p{sc=Siddham}+/ug },
// { "name": "Sind", "regex": /\p{sc=Sind}+/ug },
{ "name": "Khudawadi", "regex": /\p{sc=Khudawadi}+/ug },
// { "name": "Sinh", "regex": /\p{sc=Sinh}+/ug },
{ "name": "Sinhala", "regex": /\p{sc=Sinhala}+/ug },
// { "name": "Sogd", "regex": /\p{sc=Sogd}+/ug },
{ "name": "Sogdian", "regex": /\p{sc=Sogdian}+/ug },
// { "name": "Sogo", "regex": /\p{sc=Sogo}+/ug },
{ "name": "Old_Sogdian", "regex": /\p{sc=Old_Sogdian}+/ug },
// { "name": "Sora", "regex": /\p{sc=Sora}+/ug },
{ "name": "Sora_Sompeng", "regex": /\p{sc=Sora_Sompeng}+/ug },
// { "name": "Soyo", "regex": /\p{sc=Soyo}+/ug },
{ "name": "Soyombo", "regex": /\p{sc=Soyombo}+/ug },
// { "name": "Sund", "regex": /\p{sc=Sund}+/ug },
{ "name": "Sundanese", "regex": /\p{sc=Sundanese}+/ug },
// { "name": "Sylo", "regex": /\p{sc=Sylo}+/ug },
{ "name": "Syloti_Nagri", "regex": /\p{sc=Syloti_Nagri}+/ug },
// { "name": "Syrc", "regex": /\p{sc=Syrc}+/ug },
{ "name": "Syriac", "regex": /\p{sc=Syriac}+/ug },
// { "name": "Tagb", "regex": /\p{sc=Tagb}+/ug },
{ "name": "Tagbanwa", "regex": /\p{sc=Tagbanwa}+/ug },
// { "name": "Takr", "regex": /\p{sc=Takr}+/ug },
{ "name": "Takri", "regex": /\p{sc=Takri}+/ug },
// { "name": "Tale", "regex": /\p{sc=Tale}+/ug },
{ "name": "Tai_Le", "regex": /\p{sc=Tai_Le}+/ug },
// { "name": "Talu", "regex": /\p{sc=Talu}+/ug },
{ "name": "New_Tai_Lue", "regex": /\p{sc=New_Tai_Lue}+/ug },
// { "name": "Taml", "regex": /\p{sc=Taml}+/ug },
{ "name": "Tamil", "regex": /\p{sc=Tamil}+/ug },
// { "name": "Tang", "regex": /\p{sc=Tang}+/ug },
{ "name": "Tangut", "regex": /\p{sc=Tangut}+/ug },
// { "name": "Tavt", "regex": /\p{sc=Tavt}+/ug },
{ "name": "Tai_Viet", "regex": /\p{sc=Tai_Viet}+/ug },
// { "name": "Telu", "regex": /\p{sc=Telu}+/ug },
{ "name": "Telugu", "regex": /\p{sc=Telugu}+/ug },
// { "name": "Tfng", "regex": /\p{sc=Tfng}+/ug },
{ "name": "Tifinagh", "regex": /\p{sc=Tifinagh}+/ug },
// { "name": "Tglg", "regex": /\p{sc=Tglg}+/ug },
{ "name": "Tagalog", "regex": /\p{sc=Tagalog}+/ug },
// { "name": "Thaa", "regex": /\p{sc=Thaa}+/ug },
{ "name": "Thaana", "regex": /\p{sc=Thaana}+/ug },
{ "name": "Thai", "regex": /\p{sc=Thai}+/ug },
// { "name": "Tibt", "regex": /\p{sc=Tibt}+/ug },
{ "name": "Tibetan", "regex": /\p{sc=Tibetan}+/ug },
// { "name": "Tirh", "regex": /\p{sc=Tirh}+/ug },
{ "name": "Tirhuta", "regex": /\p{sc=Tirhuta}+/ug },
// { "name": "Ugar", "regex": /\p{sc=Ugar}+/ug },
{ "name": "Ugaritic", "regex": /\p{sc=Ugaritic}+/ug },
// { "name": "Vaii", "regex": /\p{sc=Vaii}+/ug },
{ "name": "Vai", "regex": /\p{sc=Vai}+/ug },
// { "name": "Wara", "regex": /\p{sc=Wara}+/ug },
{ "name": "Warang_Citi", "regex": /\p{sc=Warang_Citi}+/ug },
// { "name": "Wcho", "regex": /\p{sc=Wcho}+/ug }, // 未実装
// { "name": "Wancho", "regex": /\p{sc=Wancho}+/ug }, // 未実装
// { "name": "Xpeo", "regex": /\p{sc=Xpeo}+/ug },
{ "name": "Old_Persian", "regex": /\p{sc=Old_Persian}+/ug },
// { "name": "Xsux", "regex": /\p{sc=Xsux}+/ug },
{ "name": "Cuneiform", "regex": /\p{sc=Cuneiform}+/ug },
// { "name": "Yiii", "regex": /\p{sc=Yiii}+/ug },
{ "name": "Yi", "regex": /\p{sc=Yi}+/ug },
// { "name": "Zanb", "regex": /\p{sc=Zanb}+/ug },
{ "name": "Zanabazar_Square", "regex": /\p{sc=Zanabazar_Square}+/ug },
// { "name": "Zinh", "regex": /\p{sc=Zinh}+/ug },
{ "name": "Inherited", "regex": /\p{sc=Inherited}+/ug },
// { "name": "Qaai", "regex": /\p{sc=Qaai}+/ug },
// { "name": "Zyyy", "regex": /\p{sc=Zyyy}+/ug },
{ "name": "Common", "regex": /\p{sc=Common}+/ug },
// { "name": "Zzzz", "regex": /\p{sc=Zzzz}+/ug },
{ "name": "Unknown", "regex": /\p{sc=Unknown}+/ug },
];
const scriptExtensions = [
// { "name": "Adlm", "regex": /\p{scx=Adlm}+/ug },
{ "name": "Adlam", "regex": /\p{scx=Adlam}+/ug },
// { "name": "Aghb", "regex": /\p{scx=Aghb}+/ug },
{ "name": "Caucasian_Albanian", "regex": /\p{scx=Caucasian_Albanian}+/ug },
{ "name": "Ahom", "regex": /\p{scx=Ahom}+/ug },
// { "name": "Arab", "regex": /\p{scx=Arab}+/ug },
{ "name": "Arabic", "regex": /\p{scx=Arabic}+/ug },
// { "name": "Armi", "regex": /\p{scx=Armi}+/ug },
{ "name": "Imperial_Aramaic", "regex": /\p{scx=Imperial_Aramaic}+/ug },
// { "name": "Armn", "regex": /\p{scx=Armn}+/ug },
{ "name": "Armenian", "regex": /\p{scx=Armenian}+/ug },
// { "name": "Avst", "regex": /\p{scx=Avst}+/ug },
{ "name": "Avestan", "regex": /\p{scx=Avestan}+/ug },
// { "name": "Bali", "regex": /\p{scx=Bali}+/ug },
{ "name": "Balinese", "regex": /\p{scx=Balinese}+/ug },
// { "name": "Bamu", "regex": /\p{scx=Bamu}+/ug },
{ "name": "Bamum", "regex": /\p{scx=Bamum}+/ug },
// { "name": "Bass", "regex": /\p{scx=Bass}+/ug },
{ "name": "Bassa_Vah", "regex": /\p{scx=Bassa_Vah}+/ug },
// { "name": "Batk", "regex": /\p{scx=Batk}+/ug },
{ "name": "Batak", "regex": /\p{scx=Batak}+/ug },
// { "name": "Beng", "regex": /\p{scx=Beng}+/ug },
{ "name": "Bengali", "regex": /\p{scx=Bengali}+/ug },
// { "name": "Bhks", "regex": /\p{scx=Bhks}+/ug },
{ "name": "Bhaiksuki", "regex": /\p{scx=Bhaiksuki}+/ug },
// { "name": "Bopo", "regex": /\p{scx=Bopo}+/ug },
{ "name": "Bopomofo", "regex": /\p{scx=Bopomofo}+/ug },
// { "name": "Brah", "regex": /\p{scx=Brah}+/ug },
{ "name": "Brahmi", "regex": /\p{scx=Brahmi}+/ug },
// { "name": "Brai", "regex": /\p{scx=Brai}+/ug },
{ "name": "Braille", "regex": /\p{scx=Braille}+/ug },
// { "name": "Bugi", "regex": /\p{scx=Bugi}+/ug },
{ "name": "Buginese", "regex": /\p{scx=Buginese}+/ug },
// { "name": "Buhd", "regex": /\p{scx=Buhd}+/ug },
{ "name": "Buhid", "regex": /\p{scx=Buhid}+/ug },
// { "name": "Cakm", "regex": /\p{scx=Cakm}+/ug },
{ "name": "Chakma", "regex": /\p{scx=Chakma}+/ug },
// { "name": "Cans", "regex": /\p{scx=Cans}+/ug },
{ "name": "Canadian_Aboriginal", "regex": /\p{scx=Canadian_Aboriginal}+/ug },
// { "name": "Cari", "regex": /\p{scx=Cari}+/ug },
{ "name": "Carian", "regex": /\p{scx=Carian}+/ug },
{ "name": "Cham", "regex": /\p{scx=Cham}+/ug },
// { "name": "Cher", "regex": /\p{scx=Cher}+/ug },
{ "name": "Cherokee", "regex": /\p{scx=Cherokee}+/ug },
// { "name": "Copt", "regex": /\p{scx=Copt}+/ug },
{ "name": "Coptic", "regex": /\p{scx=Coptic}+/ug },
// { "name": "Qaac", "regex": /\p{scx=Qaac}+/ug },
// { "name": "Cprt", "regex": /\p{scx=Cprt}+/ug },
{ "name": "Cypriot", "regex": /\p{scx=Cypriot}+/ug },
// { "name": "Cyrl", "regex": /\p{scx=Cyrl}+/ug },
{ "name": "Cyrillic", "regex": /\p{scx=Cyrillic}+/ug },
// { "name": "Deva", "regex": /\p{scx=Deva}+/ug },
{ "name": "Devanagari", "regex": /\p{scx=Devanagari}+/ug },
// { "name": "Dogr", "regex": /\p{scx=Dogr}+/ug },
{ "name": "Dogra", "regex": /\p{scx=Dogra}+/ug },
// { "name": "Dsrt", "regex": /\p{scx=Dsrt}+/ug },
{ "name": "Deseret", "regex": /\p{scx=Deseret}+/ug },
// { "name": "Dupl", "regex": /\p{scx=Dupl}+/ug },
{ "name": "Duployan", "regex": /\p{scx=Duployan}+/ug },
// { "name": "Egyp", "regex": /\p{scx=Egyp}+/ug },
{ "name": "Egyptian_Hieroglyphs", "regex": /\p{scx=Egyptian_Hieroglyphs}+/ug },
// { "name": "Elba", "regex": /\p{scx=Elba}+/ug },
{ "name": "Elbasan", "regex": /\p{scx=Elbasan}+/ug },
// { "name": "Elym", "regex": /\p{scx=Elym}+/ug }, //未実装
// { "name": "Elymaic", "regex": /\p{scx=Elymaic}+/ug }, //未実装
// { "name": "Ethi", "regex": /\p{scx=Ethi}+/ug },
{ "name": "Ethiopic", "regex": /\p{scx=Ethiopic}+/ug },
// { "name": "Geor", "regex": /\p{scx=Geor}+/ug },
{ "name": "Georgian", "regex": /\p{scx=Georgian}+/ug },
// { "name": "Glag", "regex": /\p{scx=Glag}+/ug },
{ "name": "Glagolitic", "regex": /\p{scx=Glagolitic}+/ug },
// { "name": "Gong", "regex": /\p{scx=Gong}+/ug },
{ "name": "Gunjala_Gondi", "regex": /\p{scx=Gunjala_Gondi}+/ug },
// { "name": "Gonm", "regex": /\p{scx=Gonm}+/ug },
{ "name": "Masaram_Gondi", "regex": /\p{scx=Masaram_Gondi}+/ug },
// { "name": "Goth", "regex": /\p{scx=Goth}+/ug },
{ "name": "Gothic", "regex": /\p{scx=Gothic}+/ug },
// { "name": "Gran", "regex": /\p{scx=Gran}+/ug },
{ "name": "Grantha", "regex": /\p{scx=Grantha}+/ug },
// { "name": "Grek", "regex": /\p{scx=Grek}+/ug },
{ "name": "Greek", "regex": /\p{scx=Greek}+/ug },
// { "name": "Gujr", "regex": /\p{scx=Gujr}+/ug },
{ "name": "Gujarati", "regex": /\p{scx=Gujarati}+/ug },
// { "name": "Guru", "regex": /\p{scx=Guru}+/ug },
{ "name": "Gurmukhi", "regex": /\p{scx=Gurmukhi}+/ug },
// { "name": "Hang", "regex": /\p{scx=Hang}+/ug },
{ "name": "Hangul", "regex": /\p{scx=Hangul}+/ug },
// { "name": "Hani", "regex": /\p{scx=Hani}+/ug },
{ "name": "Han", "regex": /\p{scx=Han}+/ug },
// { "name": "Hano", "regex": /\p{scx=Hano}+/ug },
{ "name": "Hanunoo", "regex": /\p{scx=Hanunoo}+/ug },
// { "name": "Hatr", "regex": /\p{scx=Hatr}+/ug },
{ "name": "Hatran", "regex": /\p{scx=Hatran}+/ug },
// { "name": "Hebr", "regex": /\p{scx=Hebr}+/ug },
{ "name": "Hebrew", "regex": /\p{scx=Hebrew}+/ug },
// { "name": "Hira", "regex": /\p{scx=Hira}+/ug },
{ "name": "Hiragana", "regex": /\p{scx=Hiragana}+/ug },
// { "name": "Hluw", "regex": /\p{scx=Hluw}+/ug },
{ "name": "Anatolian_Hieroglyphs", "regex": /\p{scx=Anatolian_Hieroglyphs}+/ug },
// { "name": "Hmng", "regex": /\p{scx=Hmng}+/ug },
{ "name": "Pahawh_Hmong", "regex": /\p{scx=Pahawh_Hmong}+/ug },
// { "name": "Hmnp", "regex": /\p{scx=Hmnp}+/ug }, //未実装
// { "name": "Nyiakeng_Puachue_Hmong", "regex": /\p{scx=Nyiakeng_Puachue_Hmong}+/ug }, //未実装
// { "name": "Hrkt", "regex": /\p{scx=Hrkt}+/ug }, //未実装
// { "name": "Katakana_Or_Hiragana", "regex": /\p{scx=Katakana_Or_Hiragana}+/ug }, //未実装
// { "name": "Hung", "regex": /\p{scx=Hung}+/ug },
{ "name": "Old_Hungarian", "regex": /\p{scx=Old_Hungarian}+/ug },
// { "name": "Ital", "regex": /\p{scx=Ital}+/ug },
{ "name": "Old_Italic", "regex": /\p{scx=Old_Italic}+/ug },
// { "name": "Java", "regex": /\p{scx=Java}+/ug },
{ "name": "Javanese", "regex": /\p{scx=Javanese}+/ug },
// { "name": "Kali", "regex": /\p{scx=Kali}+/ug },
{ "name": "Kayah_Li", "regex": /\p{scx=Kayah_Li}+/ug },
// { "name": "Kana", "regex": /\p{scx=Kana}+/ug },
{ "name": "Katakana", "regex": /\p{scx=Katakana}+/ug },
// { "name": "Khar", "regex": /\p{scx=Khar}+/ug },
{ "name": "Kharoshthi", "regex": /\p{scx=Kharoshthi}+/ug },
// { "name": "Khmr", "regex": /\p{scx=Khmr}+/ug },
{ "name": "Khmer", "regex": /\p{scx=Khmer}+/ug },
// { "name": "Khoj", "regex": /\p{scx=Khoj}+/ug },
{ "name": "Khojki", "regex": /\p{scx=Khojki}+/ug },
// { "name": "Knda", "regex": /\p{scx=Knda}+/ug },
{ "name": "Kannada", "regex": /\p{scx=Kannada}+/ug },
// { "name": "Kthi", "regex": /\p{scx=Kthi}+/ug },
{ "name": "Kaithi", "regex": /\p{scx=Kaithi}+/ug },
// { "name": "Lana", "regex": /\p{scx=Lana}+/ug },
{ "name": "Tai_Tham", "regex": /\p{scx=Tai_Tham}+/ug },
// { "name": "Laoo", "regex": /\p{scx=Laoo}+/ug },
{ "name": "Lao", "regex": /\p{scx=Lao}+/ug },
// { "name": "Latn", "regex": /\p{scx=Latn}+/ug },
{ "name": "Latin", "regex": /\p{scx=Latin}+/ug },
// { "name": "Lepc", "regex": /\p{scx=Lepc}+/ug },
{ "name": "Lepcha", "regex": /\p{scx=Lepcha}+/ug },
// { "name": "Limb", "regex": /\p{scx=Limb}+/ug },
{ "name": "Limbu", "regex": /\p{scx=Limbu}+/ug },
// { "name": "Lina", "regex": /\p{scx=Lina}+/ug },
{ "name": "Linear_A", "regex": /\p{scx=Linear_A}+/ug },
// { "name": "Linb", "regex": /\p{scx=Linb}+/ug },
{ "name": "Linear_B", "regex": /\p{scx=Linear_B}+/ug },
{ "name": "Lisu", "regex": /\p{scx=Lisu}+/ug },
// { "name": "Lyci", "regex": /\p{scx=Lyci}+/ug },
{ "name": "Lycian", "regex": /\p{scx=Lycian}+/ug },
// { "name": "Lydi", "regex": /\p{scx=Lydi}+/ug },
{ "name": "Lydian", "regex": /\p{scx=Lydian}+/ug },
// { "name": "Mahj", "regex": /\p{scx=Mahj}+/ug },
{ "name": "Mahajani", "regex": /\p{scx=Mahajani}+/ug },
// { "name": "Maka", "regex": /\p{scx=Maka}+/ug },
{ "name": "Makasar", "regex": /\p{scx=Makasar}+/ug },
// { "name": "Mand", "regex": /\p{scx=Mand}+/ug },
{ "name": "Mandaic", "regex": /\p{scx=Mandaic}+/ug },
// { "name": "Mani", "regex": /\p{scx=Mani}+/ug },
{ "name": "Manichaean", "regex": /\p{scx=Manichaean}+/ug },
// { "name": "Marc", "regex": /\p{scx=Marc}+/ug },
{ "name": "Marchen", "regex": /\p{scx=Marchen}+/ug },
// { "name": "Medf", "regex": /\p{scx=Medf}+/ug },
{ "name": "Medefaidrin", "regex": /\p{scx=Medefaidrin}+/ug },
// { "name": "Mend", "regex": /\p{scx=Mend}+/ug },
{ "name": "Mende_Kikakui", "regex": /\p{scx=Mende_Kikakui}+/ug },
// { "name": "Merc", "regex": /\p{scx=Merc}+/ug },
{ "name": "Meroitic_Cursive", "regex": /\p{scx=Meroitic_Cursive}+/ug },
// { "name": "Mero", "regex": /\p{scx=Mero}+/ug },
{ "name": "Meroitic_Hieroglyphs", "regex": /\p{scx=Meroitic_Hieroglyphs}+/ug },
// { "name": "Mlym", "regex": /\p{scx=Mlym}+/ug },
{ "name": "Malayalam", "regex": /\p{scx=Malayalam}+/ug },
{ "name": "Modi", "regex": /\p{scx=Modi}+/ug },
// { "name": "Mong", "regex": /\p{scx=Mong}+/ug },
{ "name": "Mongolian", "regex": /\p{scx=Mongolian}+/ug },
// { "name": "Mroo", "regex": /\p{scx=Mroo}+/ug },
{ "name": "Mro", "regex": /\p{scx=Mro}+/ug },
// { "name": "Mtei", "regex": /\p{scx=Mtei}+/ug },
{ "name": "Meetei_Mayek", "regex": /\p{scx=Meetei_Mayek}+/ug },
// { "name": "Mult", "regex": /\p{scx=Mult}+/ug },
{ "name": "Multani", "regex": /\p{scx=Multani}+/ug },
// { "name": "Mymr", "regex": /\p{scx=Mymr}+/ug },
{ "name": "Myanmar", "regex": /\p{scx=Myanmar}+/ug },
// { "name": "Nand", "regex": /\p{scx=Nand}+/ug }, //未実装
// { "name": "Nandinagari", "regex": /\p{scx=Nandinagari}+/ug }, //未実装
// { "name": "Narb", "regex": /\p{scx=Narb}+/ug },
{ "name": "Old_North_Arabian", "regex": /\p{scx=Old_North_Arabian}+/ug },
// { "name": "Nbat", "regex": /\p{scx=Nbat}+/ug },
{ "name": "Nabataean", "regex": /\p{scx=Nabataean}+/ug },
{ "name": "Newa", "regex": /\p{scx=Newa}+/ug },
// { "name": "Nkoo", "regex": /\p{scx=Nkoo}+/ug },
{ "name": "Nko", "regex": /\p{scx=Nko}+/ug },
// { "name": "Nshu", "regex": /\p{scx=Nshu}+/ug },
{ "name": "Nushu", "regex": /\p{scx=Nushu}+/ug },
// { "name": "Ogam", "regex": /\p{scx=Ogam}+/ug },
{ "name": "Ogham", "regex": /\p{scx=Ogham}+/ug },
// { "name": "Olck", "regex": /\p{scx=Olck}+/ug },
{ "name": "Ol_Chiki", "regex": /\p{scx=Ol_Chiki}+/ug },
// { "name": "Orkh", "regex": /\p{scx=Orkh}+/ug },
{ "name": "Old_Turkic", "regex": /\p{scx=Old_Turkic}+/ug },
// { "name": "Orya", "regex": /\p{scx=Orya}+/ug },
{ "name": "Oriya", "regex": /\p{scx=Oriya}+/ug },
// { "name": "Osge", "regex": /\p{scx=Osge}+/ug },
{ "name": "Osage", "regex": /\p{scx=Osage}+/ug },
// { "name": "Osma", "regex": /\p{scx=Osma}+/ug },
{ "name": "Osmanya", "regex": /\p{scx=Osmanya}+/ug },
// { "name": "Palm", "regex": /\p{scx=Palm}+/ug },
{ "name": "Palmyrene", "regex": /\p{scx=Palmyrene}+/ug },
// { "name": "Pauc", "regex": /\p{scx=Pauc}+/ug },
{ "name": "Pau_Cin_Hau", "regex": /\p{scx=Pau_Cin_Hau}+/ug },
// { "name": "Perm", "regex": /\p{scx=Perm}+/ug },
{ "name": "Old_Permic", "regex": /\p{scx=Old_Permic}+/ug },
// { "name": "Phag", "regex": /\p{scx=Phag}+/ug },
{ "name": "Phags_Pa", "regex": /\p{scx=Phags_Pa}+/ug },
// { "name": "Phli", "regex": /\p{scx=Phli}+/ug },
{ "name": "Inscriptional_Pahlavi", "regex": /\p{scx=Inscriptional_Pahlavi}+/ug },
// { "name": "Phlp", "regex": /\p{scx=Phlp}+/ug },
{ "name": "Psalter_Pahlavi", "regex": /\p{scx=Psalter_Pahlavi}+/ug },
// { "name": "Phnx", "regex": /\p{scx=Phnx}+/ug },
{ "name": "Phoenician", "regex": /\p{scx=Phoenician}+/ug },
// { "name": "Plrd", "regex": /\p{scx=Plrd}+/ug },
{ "name": "Miao", "regex": /\p{scx=Miao}+/ug },
// { "name": "Prti", "regex": /\p{scx=Prti}+/ug },
{ "name": "Inscriptional_Parthian", "regex": /\p{scx=Inscriptional_Parthian}+/ug },
// { "name": "Rjng", "regex": /\p{scx=Rjng}+/ug },
{ "name": "Rejang", "regex": /\p{scx=Rejang}+/ug },
// { "name": "Rohg", "regex": /\p{scx=Rohg}+/ug },
{ "name": "Hanifi_Rohingya", "regex": /\p{scx=Hanifi_Rohingya}+/ug },
// { "name": "Runr", "regex": /\p{scx=Runr}+/ug },
{ "name": "Runic", "regex": /\p{scx=Runic}+/ug },
// { "name": "Samr", "regex": /\p{scx=Samr}+/ug },
{ "name": "Samaritan", "regex": /\p{scx=Samaritan}+/ug },
// { "name": "Sarb", "regex": /\p{scx=Sarb}+/ug },
{ "name": "Old_South_Arabian", "regex": /\p{scx=Old_South_Arabian}+/ug },
// { "name": "Saur", "regex": /\p{scx=Saur}+/ug },
{ "name": "Saurashtra", "regex": /\p{scx=Saurashtra}+/ug },
// { "name": "Sgnw", "regex": /\p{scx=Sgnw}+/ug },
{ "name": "SignWriting", "regex": /\p{scx=SignWriting}+/ug },
// { "name": "Shaw", "regex": /\p{scx=Shaw}+/ug },
{ "name": "Shavian", "regex": /\p{scx=Shavian}+/ug },
// { "name": "Shrd", "regex": /\p{scx=Shrd}+/ug },
{ "name": "Sharada", "regex": /\p{scx=Sharada}+/ug },
// { "name": "Sidd", "regex": /\p{scx=Sidd}+/ug },
{ "name": "Siddham", "regex": /\p{scx=Siddham}+/ug },
// { "name": "Sind", "regex": /\p{scx=Sind}+/ug },
{ "name": "Khudawadi", "regex": /\p{scx=Khudawadi}+/ug },
// { "name": "Sinh", "regex": /\p{scx=Sinh}+/ug },
{ "name": "Sinhala", "regex": /\p{scx=Sinhala}+/ug },
// { "name": "Sogd", "regex": /\p{scx=Sogd}+/ug },
{ "name": "Sogdian", "regex": /\p{scx=Sogdian}+/ug },
// { "name": "Sogo", "regex": /\p{scx=Sogo}+/ug },
{ "name": "Old_Sogdian", "regex": /\p{scx=Old_Sogdian}+/ug },
// { "name": "Sora", "regex": /\p{scx=Sora}+/ug },
{ "name": "Sora_Sompeng", "regex": /\p{scx=Sora_Sompeng}+/ug },
// { "name": "Soyo", "regex": /\p{scx=Soyo}+/ug },
{ "name": "Soyombo", "regex": /\p{scx=Soyombo}+/ug },
// { "name": "Sund", "regex": /\p{scx=Sund}+/ug },
{ "name": "Sundanese", "regex": /\p{scx=Sundanese}+/ug },
// { "name": "Sylo", "regex": /\p{scx=Sylo}+/ug },
{ "name": "Syloti_Nagri", "regex": /\p{scx=Syloti_Nagri}+/ug },
// { "name": "Syrc", "regex": /\p{scx=Syrc}+/ug },
{ "name": "Syriac", "regex": /\p{scx=Syriac}+/ug },
// { "name": "Tagb", "regex": /\p{scx=Tagb}+/ug },
{ "name": "Tagbanwa", "regex": /\p{scx=Tagbanwa}+/ug },
// { "name": "Takr", "regex": /\p{scx=Takr}+/ug },
{ "name": "Takri", "regex": /\p{scx=Takri}+/ug },
// { "name": "Tale", "regex": /\p{scx=Tale}+/ug },
{ "name": "Tai_Le", "regex": /\p{scx=Tai_Le}+/ug },
// { "name": "Talu", "regex": /\p{scx=Talu}+/ug },
{ "name": "New_Tai_Lue", "regex": /\p{scx=New_Tai_Lue}+/ug },
// { "name": "Taml", "regex": /\p{scx=Taml}+/ug },
{ "name": "Tamil", "regex": /\p{scx=Tamil}+/ug },
// { "name": "Tang", "regex": /\p{scx=Tang}+/ug },
{ "name": "Tangut", "regex": /\p{scx=Tangut}+/ug },
// { "name": "Tavt", "regex": /\p{scx=Tavt}+/ug },
{ "name": "Tai_Viet", "regex": /\p{scx=Tai_Viet}+/ug },
// { "name": "Telu", "regex": /\p{scx=Telu}+/ug },
{ "name": "Telugu", "regex": /\p{scx=Telugu}+/ug },
// { "name": "Tfng", "regex": /\p{scx=Tfng}+/ug },
{ "name": "Tifinagh", "regex": /\p{scx=Tifinagh}+/ug },
// { "name": "Tglg", "regex": /\p{scx=Tglg}+/ug },
{ "name": "Tagalog", "regex": /\p{scx=Tagalog}+/ug },
// { "name": "Thaa", "regex": /\p{scx=Thaa}+/ug },
{ "name": "Thaana", "regex": /\p{scx=Thaana}+/ug },
{ "name": "Thai", "regex": /\p{scx=Thai}+/ug },
// { "name": "Tibt", "regex": /\p{scx=Tibt}+/ug },
{ "name": "Tibetan", "regex": /\p{scx=Tibetan}+/ug },
// { "name": "Tirh", "regex": /\p{scx=Tirh}+/ug },
{ "name": "Tirhuta", "regex": /\p{scx=Tirhuta}+/ug },
// { "name": "Ugar", "regex": /\p{scx=Ugar}+/ug },
{ "name": "Ugaritic", "regex": /\p{scx=Ugaritic}+/ug },
// { "name": "Vaii", "regex": /\p{scx=Vaii}+/ug },
{ "name": "Vai", "regex": /\p{scx=Vai}+/ug },
// { "name": "Wara", "regex": /\p{scx=Wara}+/ug },
{ "name": "Warang_Citi", "regex": /\p{scx=Warang_Citi}+/ug },
// { "name": "Wcho", "regex": /\p{scx=Wcho}+/ug }, // 未実装
// { "name": "Wancho", "regex": /\p{scx=Wancho}+/ug }, // 未実装
// { "name": "Xpeo", "regex": /\p{scx=Xpeo}+/ug },
{ "name": "Old_Persian", "regex": /\p{scx=Old_Persian}+/ug },
// { "name": "Xsux", "regex": /\p{scx=Xsux}+/ug },
{ "name": "Cuneiform", "regex": /\p{scx=Cuneiform}+/ug },
// { "name": "Yiii", "regex": /\p{scx=Yiii}+/ug },
{ "name": "Yi", "regex": /\p{scx=Yi}+/ug },
// { "name": "Zanb", "regex": /\p{scx=Zanb}+/ug },
{ "name": "Zanabazar_Square", "regex": /\p{scx=Zanabazar_Square}+/ug },
// { "name": "Zinh", "regex": /\p{scx=Zinh}+/ug },
{ "name": "Inherited", "regex": /\p{scx=Inherited}+/ug },
// { "name": "Qaai", "regex": /\p{scx=Qaai}+/ug },
// { "name": "Zyyy", "regex": /\p{scx=Zyyy}+/ug },
{ "name": "Common", "regex": /\p{scx=Common}+/ug },
// { "name": "Zzzz", "regex": /\p{scx=Zzzz}+/ug },
{ "name": "Unknown", "regex": /\p{scx=Unknown}+/ug },
];
const block1 = [
{
"start": 0x0000,
"end": 0x007F,
"name": "Basic Latin",
"japaneseName": "基本ラテン文字"
},
{
"start": 0x0080,
"end": 0x00FF,
"name": "Latin-1 Supplement",
"japaneseName": "ラテン1補助"
},
{
"start": 0x0100,
"end": 0x017F,
"name": "Latin Extended-A",
"japaneseName": "ラテン文字拡張A"
},
{
"start": 0x0180,
"end": 0x024F,
"name": "Latin Extended-B",
"japaneseName": "ラテン文字拡張B"
},
{
"start": 0x0250,
"end": 0x02AF,
"name": "IPA Extensions",
"japaneseName": "IPA拡張"
},
{
"start": 0x02B0,
"end": 0x02FF,
"name": "Spacing Modifier Letters",
"japaneseName": "前進を伴う修飾文字"
},
{
"start": 0x0300,
"end": 0x036F,
"name": "Combining Diacritical Marks",
"japaneseName": "合成可能なダイアクリティカルマーク"
},
{
"start": 0x0370,
"end": 0x03FF,
"name": "Greek and Coptic",
"japaneseName": "ギリシア文字及びコプト文字"
},
{
"start": 0x0400,
"end": 0x04FF,
"name": "Cyrillic",
"japaneseName": "キリル文字"
},
{
"start": 0x0500,
"end": 0x052F,
"name": "Cyrillic Supplement",
"japaneseName": "キリル文字補助"
},
{
"start": 0x0530,
"end": 0x058F,
"name": "Armenian",
"japaneseName": "アルメニア文字"
},
{
"start": 0x0590,
"end": 0x05FF,
"name": "Hebrew",
"japaneseName": "ヘブライ文字"
},
{
"start": 0x0600,
"end": 0x06FF,
"name": "Arabic",
"japaneseName": "アラビア文字"
},
{
"start": 0x0700,
"end": 0x074F,
"name": "Syriac",
"japaneseName": "シリア文字"
},
{
"start": 0x0750,
"end": 0x077F,
"name": "Arabic Supplement",
"japaneseName": "アラビア文字補助"
},
{
"start": 0x0780,
"end": 0x07BF,
"name": "Thaana",
"japaneseName": "ターナ文字"
},
{
"start": 0x07C0,
"end": 0x07FF,
"name": "NKo",
"japaneseName": "ンコ文字"
},
{
"start": 0x0800,
"end": 0x083F,
"name": "Samaritan",
"japaneseName": "サマリア文字"
},
{
"start": 0x0840,
"end": 0x085F,
"name": "Mandaic",
"japaneseName": "マンダ文字"
},
{
"start": 0x0860,
"end": 0x086F,
"name": "Syriac Supplement",
"japaneseName": "シリア文字拡張"
},
{
"start": 0x08A0,
"end": 0x08FF,
"name": "Arabic Extended-A",
"japaneseName": "アラビア文字拡張A"
},
{
"start": 0x0900,
"end": 0x097F,
"name": "Devanagari",
"japaneseName": "デーヴァナーガリー文字"
},
{
"start": 0x0980,
"end": 0x09FF,
"name": "Bengali",
"japaneseName": "ベンガル文字"
},
{
"start": 0x0A00,
"end": 0x0A7F,
"name": "Gurmukhi",
"japaneseName": "グルムキー文字"
},
{
"start": 0x0A80,
"end": 0x0AFF,
"name": "Gujarati",
"japaneseName": "グジャラート文字"
},
{
"start": 0x0B00,
"end": 0x0B7F,
"name": "Oriya",
"japaneseName": "オリヤー文字"
},
{
"start": 0x0B80,
"end": 0x0BFF,
"name": "Tamil",
"japaneseName": "タミル文字"
},
{
"start": 0x0C00,
"end": 0x0C7F,
"name": "Telugu",
"japaneseName": "テルグ文字"
},
{
"start": 0x0C80,
"end": 0x0CFF,
"name": "Kannada",
"japaneseName": "カンナダ文字"
},
{
"start": 0x0D00,
"end": 0x0D7F,
"name": "Malayalam",
"japaneseName": "マラヤーラム文字"
},
{
"start": 0x0D80,
"end": 0x0DFF,
"name": "Sinhala",
"japaneseName": "シンハラ文字"
},
{
"start": 0x0E00,
"end": 0x0E7F,
"name": "Thai",
"japaneseName": "タイ文字"
},
{
"start": 0x0E80,
"end": 0x0EFF,
"name": "Lao",
"japaneseName": "ラオス文字"
},
{
"start": 0x0F00,
"end": 0x0FFF,
"name": "Tibetan",
"japaneseName": "チベット文字"
},
{
"start": 0x1000,
"end": 0x109F,
"name": "Myanmar",
"japaneseName": "ビルマ文字"
},
{
"start": 0x10A0,
"end": 0x10FF,
"name": "Georgian",
"japaneseName": "グルジア文字"
},
{
"start": 0x1100,
"end": 0x11FF,
"name": "Hangul Jamo",
"japaneseName": "ハングル字母"
},
{
"start": 0x1200,
"end": 0x137F,
"name": "Ethiopic",
"japaneseName": "エチオピア文字"
},
{
"start": 0x1380,
"end": 0x139F,
"name": "Ethiopic Supplement",
"japaneseName": "エチオピア文字補助"
},
{
"start": 0x13A0,
"end": 0x13FF,
"name": "Cherokee",
"japaneseName": "チェロキー文字"
},
{
"start": 0x1400,
"end": 0x167F,
"name": "Unified Canadian Aboriginal Syllabics",
"japaneseName": "統合カナダ先住民文字"
},
{
"start": 0x1680,
"end": 0x169F,
"name": "Ogham",
"japaneseName": "オガム文字"
},
{
"start": 0x16A0,
"end": 0x16FF,
"name": "Runic",
"japaneseName": "ルーン文字"
},
{
"start": 0x1700,
"end": 0x171F,
"name": "Tagalog",
"japaneseName": "タガログ文字"
},
{
"start": 0x1720,
"end": 0x173F,
"name": "Hanunoo",
"japaneseName": "ハヌノオ文字"
},
{
"start": 0x1740,
"end": 0x175F,
"name": "Buhid",
"japaneseName": "ブヒッド文字"
},
{
"start": 0x1760,
"end": 0x177F,
"name": "Tagbanwa",
"japaneseName": "タグバヌワ文字"
},
{
"start": 0x1780,
"end": 0x17FF,
"name": "Khmer",
"japaneseName": "クメール文字"
},
{
"start": 0x1800,
"end": 0x18AF,
"name": "Mongolian",
"japaneseName": "モンゴル文字"
},
{
"start": 0x18B0,
"end": 0x18FF,
"name": "Unified Canadian Aboriginal Syllabics Extended",
"japaneseName": "統合カナダ先住民文字拡張"
},
{
"start": 0x1900,
"end": 0x194F,
"name": "Limbu",
"japaneseName": "リンブ文字"
},
{
"start": 0x1950,
"end": 0x197F,
"name": "Tai Le",
"japaneseName": "タイ・ナ文字"
},
{
"start": 0x1980,
"end": 0x19DF,
"name": "New Tai Lue",
"japaneseName": "新タイロ文字"
},
{
"start": 0x19E0,
"end": 0x19FF,
"name": "Khmer Symbols",
"japaneseName": "クメール文字用記号"
},
{
"start": 0x1A00,
"end": 0x1A1F,
"name": "Buginese",
"japaneseName": "ブギス文字"
},
{
"start": 0x1A20,
"end": 0x1AAF,
"name": "Tai Tham",
"japaneseName": "タイタム文字"
},
{
"start": 0x1AB0,
"end": 0x1AFF,
"name": "Combining Diacritical Marks Extended",
"japaneseName": "合成可能なダイアクリティカルマーク拡張"
},
{
"start": 0x1B00,
"end": 0x1B7F,
"name": "Balinese",
"japaneseName": "バリ文字"
},
{
"start": 0x1B80,
"end": 0x1BBF,
"name": "Sundanese",
"japaneseName": "スンダ文字"
},
{
"start": 0x1BC0,
"end": 0x1BFF,
"name": "Batak",
"japaneseName": "バタク文字"
},
{
"start": 0x1C00,
"end": 0x1C4F,
"name": "Lepcha",
"japaneseName": "レプチャ文字"
},
{
"start": 0x1C50,
"end": 0x1C7F,
"name": "Ol Chiki",
"japaneseName": "オルチキ文字"
},
{
"start": 0x1C80,
"end": 0x1C8F,
"name": "Cyrillic Extended-C",
"japaneseName": "キリル文字拡張C"
},
{
"start": 0x1C90,
"end": 0x1CBF,
"name": "Georgian Extended",
"japaneseName": "グルジア文字拡張"
},
{
"start": 0x1CC0,
"end": 0x1CCF,
"name": "Sundanese Supplement",
"japaneseName": "スンダ文字補助"
},
{
"start": 0x1CD0,
"end": 0x1CFF,
"name": "Vedic Extensions",
"japaneseName": "ヴェーダ文字拡張"
},
{
"start": 0x1D00,
"end": 0x1D7F,
"name": "Phonetic Extensions",
"japaneseName": "音声記号拡張"
},
{
"start": 0x1D80,
"end": 0x1DBF,
"name": "Phonetic Extensions Supplement",
"japaneseName": "音声記号拡張補助"
},
{
"start": 0x1DC0,
"end": 0x1DFF,
"name": "Combining Diacritical Marks Supplement",
"japaneseName": "合成可能なダイアクリティカルマーク補助"
},
{
"start": 0x1E00,
"end": 0x1EFF,
"name": "Latin Extended Additional",
"japaneseName": "ラテン文字拡張追加"
},
{
"start": 0x1F00,
"end": 0x1FFF,
"name": "Greek Extended",
"japaneseName": "ギリシア文字拡張"
},
{
"start": 0x2000,
"end": 0x206F,
"name": "General Punctuation",
"japaneseName": "一般句読点"
},
{
"start": 0x2070,
"end": 0x209F,
"name": "Superscripts and Subscripts",
"japaneseName": "上付き・下付き"
},
{
"start": 0x20A0,
"end": 0x20CF,
"name": "Currency Symbols",
"japaneseName": "通貨記号"
},
{
"start": 0x20D0,
"end": 0x20FF,
"name": "Combining Diacritical Marks for Symbols",
"japaneseName": "合成可能な記号用ダイアクリティカルマーク"
},
{
"start": 0x2100,
"end": 0x214F,
"name": "Letterlike Symbols",
"japaneseName": "文字様記号"
},
{
"start": 0x2150,
"end": 0x218F,
"name": "Number Forms",
"japaneseName": "数字に準じるもの"
},
{
"start": 0x2190,
"end": 0x21FF,
"name": "Arrows",
"japaneseName": "矢印"
},
{
"start": 0x2200,
"end": 0x22FF,
"name": "Mathematical Operators",
"japaneseName": "数学記号"
},
{
"start": 0x2300,
"end": 0x23FF,
"name": "Miscellaneous Technical",
"japaneseName": "その他の技術用記号"
},
{
"start": 0x2400,
"end": 0x243F,
"name": "Control Pictures",
"japaneseName": "制御機能用記号"
},
{
"start": 0x2440,
"end": 0x245F,
"name": "Optical Character Recognition",
"japaneseName": "光学的文字認識"
},
{
"start": 0x2460,
"end": 0x24FF,
"name": "Enclosed Alphanumerics",
"japaneseName": "囲み英数字"
},
{
"start": 0x2500,
"end": 0x257F,
"name": "Box Drawing",
"japaneseName": "罫線素片"
},
{
"start": 0x2580,
"end": 0x259F,
"name": "Block Elements",
"japaneseName": "ブロック要素"
},
{
"start": 0x25A0,
"end": 0x25FF,
"name": "Geometric Shapes",
"japaneseName": "幾何学模様"
},
{
"start": 0x2600,
"end": 0x26FF,
"name": "Miscellaneous Symbols",
"japaneseName": "その他の記号"
},
{
"start": 0x2700,
"end": 0x27BF,
"name": "Dingbats",
"japaneseName": "装飾記号"
},
{
"start": 0x27C0,
"end": 0x27EF,
"name": "Miscellaneous Mathematical Symbols-A",
"japaneseName": "その他の数学記号A"
},
{
"start": 0x27F0,
"end": 0x27FF,
"name": "Supplemental Arrows-A",
"japaneseName": "補助矢印A"
},
{
"start": 0x2800,
"end": 0x28FF,
"name": "Braille Patterns",
"japaneseName": "点字図形"
},
{
"start": 0x2900,
"end": 0x297F,
"name": "Supplemental Arrows-B",
"japaneseName": "補助矢印B"
},
{
"start": 0x2980,
"end": 0x29FF,
"name": "Miscellaneous Mathematical Symbols-B",
"japaneseName": "その他の数学記号B"
},
{
"start": 0x2A00,
"end": 0x2AFF,
"name": "Supplemental Mathematical Operators",
"japaneseName": "補助数学記号"
},
{
"start": 0x2B00,
"end": 0x2BFF,
"name": "Miscellaneous Symbols and Arrows",
"japaneseName": "その他の記号及び矢印"
},
{
"start": 0x2C00,
"end": 0x2C5F,
"name": "Glagolitic",
"japaneseName": "グラゴル文字"
},
{
"start": 0x2C60,
"end": 0x2C7F,
"name": "Latin Extended-C",
"japaneseName": "ラテン文字拡張C"
},
{
"start": 0x2C80,
"end": 0x2CFF,
"name": "Coptic",
"japaneseName": "コプト文字"
},
{
"start": 0x2D00,
"end": 0x2D2F,
"name": "Georgian Supplement",
"japaneseName": "グルジア文字補助"
},
{
"start": 0x2D30,
"end": 0x2D7F,
"name": "Tifinagh",
"japaneseName": "ティフナグ文字"
},
{
"start": 0x2D80,
"end": 0x2DDF,
"name": "Ethiopic Extended",
"japaneseName": "エチオピア文字拡張"
},
{
"start": 0x2DE0,
"end": 0x2DFF,
"name": "Cyrillic Extended-A",
"japaneseName": "キリル文字拡張A"
},
{
"start": 0x2E00,
"end": 0x2E7F,
"name": "Supplemental Punctuation",
"japaneseName": "補助句読点"
},
{
"start": 0x2E80,
"end": 0x2EFF,
"name": "CJK Radicals Supplement",
"japaneseName": "CJK部首補助"
},
{
"start": 0x2F00,
"end": 0x2FDF,
"name": "Kangxi Radicals",
"japaneseName": "康熙部首"
},
{
"start": 0x2FF0,
"end": 0x2FFF,
"name": "Ideographic Description Characters",
"japaneseName": "漢字構成記述文字"
},
{
"start": 0x3000,
"end": 0x303F,
"name": "CJK Symbols and Punctuation",
"japaneseName": "CJKの記号及び句読点"
},
{
"start": 0x3040,
"end": 0x309F,
"name": "Hiragana",
"japaneseName": "平仮名"
},
{
"start": 0x30A0,
"end": 0x30FF,
"name": "Katakana",
"japaneseName": "片仮名"
},
{
"start": 0x3100,
"end": 0x312F,
"name": "Bopomofo",
"japaneseName": "注音字母"
},
{
"start": 0x3130,
"end": 0x318F,
"name": "Hangul Compatibility Jamo",
"japaneseName": "ハングル互換字母"
},
{
"start": 0x3190,
"end": 0x319F,
"name": "Kanbun",
"japaneseName": "漢文用記号"
},
{
"start": 0x31A0,
"end": 0x31BF,
"name": "Bopomofo Extended",
"japaneseName": "注音字母拡張"
},
{
"start": 0x31C0,
"end": 0x31EF,
"name": "CJK Strokes",
"japaneseName": "CJKの筆画"
},
{
"start": 0x31F0,
"end": 0x31FF,
"name": "Katakana Phonetic Extensions",
"japaneseName": "片仮名拡張"
},
{
"start": 0x3200,
"end": 0x32FF,
"name": "Enclosed CJK Letters and Months",
"japaneseName": "囲みCJK文字・月"
},
{
"start": 0x3300,
"end": 0x33FF,
"name": "CJK Compatibility",
"japaneseName": "CJK互換用文字"
},
{
"start": 0x3400,
"end": 0x4DBF,
"name": "CJK Unified Ideographs Extension A",
"japaneseName": "CJK統合漢字拡張A"
},
{
"start": 0x4DC0,
"end": 0x4DFF,
"name": "Yijing Hexagram Symbols",
"japaneseName": "易経記号"
},
{
"start": 0x4E00,
"end": 0x9FFF,
"name": "CJK Unified Ideographs",
"japaneseName": "CJK統合漢字"
},
{
"start": 0xA000,
"end": 0xA48F,
"name": "Yi Syllables",
"japaneseName": "イ文字"
},
{
"start": 0xA490,
"end": 0xA4CF,
"name": "Yi Radicals",
"japaneseName": "イ文字部首"
},
{
"start": 0xA4D0,
"end": 0xA4FF,
"name": "Lisu",
"japaneseName": "リス文字"
},
{
"start": 0xA500,
"end": 0xA63F,
"name": "Vai",
"japaneseName": "ヴァイ文字"
},
{
"start": 0xA640,
"end": 0xA69F,
"name": "Cyrillic Extended-B",
"japaneseName": "キリル文字拡張B"
},
{
"start": 0xA6A0,
"end": 0xA6FF,
"name": "Bamum",
"japaneseName": "バムン文字"
},
{
"start": 0xA700,
"end": 0xA71F,
"name": "Modifier Tone Letters",
"japaneseName": "声調修飾文字"
},
{
"start": 0xA720,
"end": 0xA7FF,
"name": "Latin Extended-D",
"japaneseName": "ラテン文字拡張D"
},
{
"start": 0xA800,
"end": 0xA82F,
"name": "Syloti Nagri",
"japaneseName": "シロティ・ナグリ文字"
},
{
"start": 0xA830,
"end": 0xA83F,
"name": "Common Indic Number Forms",
"japaneseName": "インド慣用数量記号"
},
{
"start": 0xA840,
"end": 0xA87F,
"name": "Phags-pa",
"japaneseName": "パスパ文字"
},
{
"start": 0xA880,
"end": 0xA8DF,
"name": "Saurashtra",
"japaneseName": "サウラーシュトラ文字"
},
{
"start": 0xA8E0,
"end": 0xA8FF,
"name": "Devanagari Extended",
"japaneseName": "デーヴァナーガリー文字拡張"
},
{
"start": 0xA900,
"end": 0xA92F,
"name": "Kayah Li",
"japaneseName": "カヤー文字"
},
{
"start": 0xA930,
"end": 0xA95F,
"name": "Rejang",
"japaneseName": "ルジャン文字"
},
{
"start": 0xA960,
"end": 0xA97F,
"name": "Hangul Jamo Extended-A",
"japaneseName": "ハングル字母拡張A"
},
{
"start": 0xA980,
"end": 0xA9DF,
"name": "Javanese",
"japaneseName": "ジャワ文字"
},
{
"start": 0xA9E0,
"end": 0xA9FF,
"name": "Myanmar Extended-B",
"japaneseName": "ビルマ文字拡張B"
},
{
"start": 0xAA00,
"end": 0xAA5F,
"name": "Cham",
"japaneseName": "チャム文字"
},
{
"start": 0xAA60,
"end": 0xAA7F,
"name": "Myanmar Extended-A",
"japaneseName": "ビルマ文字拡張A"
},
{
"start": 0xAA80,
"end": 0xAADF,
"name": "Tai Viet",
"japaneseName": "タイ・ヴェト文字"
},
{
"start": 0xAAE0,
"end": 0xAAFF,
"name": "Meetei Mayek Extensions",
"japaneseName": "メイテイ文字拡張"
},
{
"start": 0xAB00,
"end": 0xAB2F,
"name": "Ethiopic Extended-A",
"japaneseName": "エチオピア文字拡張A"
},
{
"start": 0xAB30,
"end": 0xAB6F,
"name": "Latin Extended-E",
"japaneseName": "ラテン文字拡張E"
},
{
"start": 0xAB70,
"end": 0xABBF,
"name": "Cherokee Supplemen",
"japaneseName": "チェロキー文字補助"
},
{
"start": 0xABC0,
"end": 0xABFF,
"name": "Meetei Mayek",
"japaneseName": " メイテイ文字"
},
{
"start": 0xAC00,
"end": 0xD7AF,
"name": "Hangul Syllables",
"japaneseName": "ハングル音節文字"
},
{
"start": 0xD7B0,
"end": 0xD7FF,
"name": "Hangul Jamo Extended-B",
"japaneseName": "ハングル字母拡張B"
},
{
"start": 0xD800,
"end": 0xDB7F,
"name": "High Surrogates",
"japaneseName": "サロゲート(上位代用符号位置)"
},
{
"start": 0xDB80,
"end": 0xDBFF,
"name": "High Private Use Surrogates",
"japaneseName": "サロゲート(上位私用代用符号位置)"
},
{
"start": 0xDC00,
"end": 0xDFFF,
"name": "Low Surrogates",
"japaneseName": "サロゲート(下位代用符号位置)"
},
{
"start": 0xE000,
"end": 0xF8FF,
"name": "Private Use Area",
"japaneseName": "私用領域"
},
{
"start": 0xF900,
"end": 0xFAFF,
"name": "CJK Compatibility Ideographs",
"japaneseName": "CJK互換漢字"
},
{
"start": 0xFB00,
"end": 0xFB4F,
"name": "Alphabetic Presentation Forms",
"japaneseName": "アルファベット表示形"
},
{
"start": 0xFB50,
"end": 0xFDFF,
"name": "Arabic Presentation Forms-A",
"japaneseName": "アラビア表示形A"
},
{
"start": 0xFE00,
"end": 0xFE0F,
"name": "Variation Selectors",
"japaneseName": "字形選択子"
},
{
"start": 0xFE10,
"end": 0xFE1F,
"name": "Vertical Forms",
"japaneseName": "縦書き形"
},
{
"start": 0xFE20,
"end": 0xFE2F,
"name": "Combining Half Marks",
"japaneseName": "合成可能な半記号"
},
{
"start": 0xFE30,
"end": 0xFE4F,
"name": "CJK Compatibility Forms",
"japaneseName": "CJK互換形"
},
{
"start": 0xFE50,
"end": 0xFE6F,
"name": "Small Form Variants",
"japaneseName": "小字形"
},
{
"start": 0xFE70,
"end": 0xFEFF,
"name": "Arabic Presentation Forms-B",
"japaneseName": "アラビア表示形B"
},
{
"start": 0xFF00,
"end": 0xFFEF,
"name": "Halfwidth and Fullwidth Forms",
"japaneseName": "半角・全角形"
},
{
"start": 0xFFF0,
"end": 0xFFFF,
"name": "Specials",
"japaneseName": "特殊用途文字"
},
];
const block2 = [
{
"start": 0x10000,
"end": 0x1007F,
"name": "Linear B Syllabary",
"japaneseName": "線文字B音節文字"
},
{
"start": 0x10080,
"end": 0x100FF,
"name": "Linear B Ideograms",
"japaneseName": "線文字B表意文字"
},
{
"start": 0x10100,
"end": 0x1013F,
"name": "Aegean Numbers",
"japaneseName": "エーゲ数字"
},
{
"start": 0x10140,
"end": 0x1018F,
"name": "Ancient Greek Numbers",
"japaneseName": "古代ギリシア数字"
},
{
"start": 0x10190,
"end": 0x101CF,
"name": "Ancient Symbols",
"japaneseName": "古代記号"
},
{
"start": 0x101D0,
"end": 0x101FF,
"name": "Phaistos Disc",
"japaneseName": "ファイストスの円盤文字"
},
{
"start": 0x10280,
"end": 0x1029F,
"name": "Lycian",
"japaneseName": "リュキア文字"
},
{
"start": 0x102A0,
"end": 0x102DF,
"name": "Carian",
"japaneseName": "カリア文字"
},
{
"start": 0x102E0,
"end": 0x102FF,
"name": "Coptic Epact Numbers",
"japaneseName": "コプト・エパクト数字"
},
{
"start": 0x10300,
"end": 0x1032F,
"name": "Old Italic",
"japaneseName": "古代イタリア文字"
},
{
"start": 0x10330,
"end": 0x1034F,
"name": "Gothic",
"japaneseName": "ゴート文字"
},
{
"start": 0x10350,
"end": 0x1037F,
"name": "Old Permic",
"japaneseName": "古ペルム文字"
},
{
"start": 0x10380,
"end": 0x1039F,
"name": "Ugaritic",
"japaneseName": "ウガリト文字"
},
{
"start": 0x103A0,
"end": 0x103DF,
"name": "Old Persian",
"japaneseName": "古代ペルシャ文字"
},
{
"start": 0x10400,
"end": 0x1044F,
"name": "Deseret",
"japaneseName": "デザレット文字"
},
{
"start": 0x10450,
"end": 0x1047F,
"name": "Shavian",
"japaneseName": "ショー文字"
},
{
"start": 0x10480,
"end": 0x104AF,
"name": "Osmanya",
"japaneseName": "オスマニア文字"
},
{
"start": 0x104B0,
"end": 0x104FF,
"name": "Osage",
"japaneseName": "オセージ文字"
},
{
"start": 0x10500,
"end": 0x1052F,
"name": "Elbasan",
"japaneseName": "エルバサン文字"
},
{
"start": 0x10530,
"end": 0x1056F,
"name": "Caucasian Albanian",
"japaneseName": "カフカス・アルバニア文字"
},
{
"start": 0x10600,
"end": 0x1077F,
"name": "Linear A",
"japaneseName": "線文字A"
},
{
"start": 0x10800,
"end": 0x1083F,
"name": "Cypriot Syllabary",
"japaneseName": "キプロス音節文字"
},
{
"start": 0x10840,
"end": 0x1085F,
"name": "Imperial Aramaic",
"japaneseName": "帝国アラム文字"
},
{
"start": 0x10860,
"end": 0x1087F,
"name": "Palmyrene",
"japaneseName": "パルミラ文字"
},
{
"start": 0x10880,
"end": 0x108AF,
"name": "Nabataean",
"japaneseName": "ナバテア文字"
},
{
"start": 0x108E0,
"end": 0x108FF,
"name": "Hatran",
"japaneseName": "ハトラ文字"
},
{
"start": 0x10900,
"end": 0x1091F,
"name": "Phoenician",
"japaneseName": "フェニキア文字"
},
{
"start": 0x10920,
"end": 0x1093F,
"name": "Lydian",
"japaneseName": "リュディア文字"
},
{
"start": 0x10980,
"end": 0x1099F,
"name": "Meroitic Hieroglyphs",
"japaneseName": "メロエ文字楷書体"
},
{
"start": 0x109A0,
"end": 0x109FF,
"name": "Meroitic Cursive",
"japaneseName": "メロエ文字草書体"
},
{
"start": 0x10A00,
"end": 0x10A5F,
"name": "Kharoshthi",
"japaneseName": "カローシュティー文字"
},
{
"start": 0x10A60,
"end": 0x10A7F,
"name": "Old South Arabian",
"japaneseName": "古代南アラビア文字"
},
{
"start": 0x10A80,
"end": 0x10A9F,
"name": "Old North Arabian",
"japaneseName": "古代北アラビア文字"
},
{
"start": 0x10AC0,
"end": 0x10AFF,
"name": "Manichaean",
"japaneseName": "マニ文字"
},
{
"start": 0x10B00,
"end": 0x10B3F,
"name": "Avestan",
"japaneseName": "アヴェスタ文字"
},
{
"start": 0x10B40,
"end": 0x10B5F,
"name": "Inscriptional Parthian",
"japaneseName": "碑文パルティア文字"
},
{
"start": 0x10B60,
"end": 0x10B7F,
"name": "Inscriptional Pahlavi",
"japaneseName": "碑文パフラヴィ文字"
},
{
"start": 0x10B80,
"end": 0x10BAF,
"name": "Psalter Pahlavi",
"japaneseName": "詩編パフラヴィ文字"
},
{
"start": 0x10C00,
"end": 0x10C4F,
"name": "Old Turkic",
"japaneseName": "突厥文字"
},
{
"start": 0x10C80,
"end": 0x10CFF,
"name": "Old Hungarian",
"japaneseName": "古ハンガリー文字"
},
{
"start": 0x10D00,
"end": 0x10D3F,
"name": "Hanifi Rohingya",
"japaneseName": "ハニーフィー・ロヒンギャ文字"
},
{
"start": 0x10E60,
"end": 0x10E7F,
"name": "Rumi Numeral Symbols",
"japaneseName": "ルミ数字記号"
},
{
"start": 0x10F00,
"end": 0x10F2F,
"name": "Old Sogdian",
"japaneseName": "古ソグド文字"
},
{
"start": 0x10F30,
"end": 0x10F6F,
"name": "Sogdian",
"japaneseName": "ソグド文字"
},
{
"start": 0x10FE0,
"end": 0x10FFF,
"name": "Elymaic",
"japaneseName": "エリマイス文字"
},
{
"start": 0x11000,
"end": 0x1107F,
"name": "Brahmi",
"japaneseName": "ブラーフミー文字"
},
{
"start": 0x11080,
"end": 0x110CF,
"name": "Kaithi",
"japaneseName": "カイティー文字"
},
{
"start": 0x110D0,
"end": 0x110FF,
"name": "Sora Sompeng",
"japaneseName": "ソラングソンペング文字"
},
{
"start": 0x11100,
"end": 0x1114F,
"name": "Chakma",
"japaneseName": "チャクマ文字"
},
{
"start": 0x11150,
"end": 0x1117F,
"name": "Mahajani",
"japaneseName": "マハージャニー文字"
},
{
"start": 0x11180,
"end": 0x111DF,
"name": "Sharada",
"japaneseName": "シャーラダー文字"
},
{
"start": 0x111E0,
"end": 0x111FF,
"name": "Sinhala Archaic Numbers",
"japaneseName": "旧シンハラ数字"
},
{
"start": 0x11200,
"end": 0x1124F,
"name": "Khojki",
"japaneseName": "ホジャ文字"
},
{
"start": 0x11280,
"end": 0x112AF,
"name": "Multani",
"japaneseName": "ムルターニー文字"
},
{
"start": 0x112B0,
"end": 0x112FF,
"name": "Khudawadi",
"japaneseName": "フダーワーディー文字"
},
{
"start": 0x11300,
"end": 0x1137F,
"name": "Grantha",
"japaneseName": "グランタ文字"
},
{
"start": 0x11400,
"end": 0x1147F,
"name": "Newa",
"japaneseName": "ネワ文字"
},
{
"start": 0x11480,
"end": 0x114DF,
"name": "Tirhuta",
"japaneseName": "ティルフータ文字"
},
{
"start": 0x11580,
"end": 0x115FF,
"name": "Siddham",
"japaneseName": "悉曇文字"
},
{
"start": 0x11600,
"end": 0x1165F,
"name": "Modi",
"japaneseName": "モーディー文字"
},
{
"start": 0x11660,
"end": 0x1167F,
"name": "Mongolian Supplement",
"japaneseName": "モンゴル文字補助"
},
{
"start": 0x11680,
"end": 0x116CF,
"name": "Takri",
"japaneseName": "タークリー文字"
},
{
"start": 0x11700,
"end": 0x1173F,
"name": "Ahom",
"japaneseName": "アーホム文字"
},
{
"start": 0x11800,
"end": 0x1184F,
"name": "Dogra",
"japaneseName": "ドーグリー文字"
},
{
"start": 0x118A0,
"end": 0x118FF,
"name": "Warang Citi",
"japaneseName": "ワラング・クシティ文字"
},
{
"start": 0x119A0,
"end": 0x119FF,
"name": "Nandinagari",
"japaneseName": "ナンディナーガリー文字"
},
{
"start": 0x11A00,
"end": 0x11A4F,
"name": "Zanabazar Square",
"japaneseName": "ザナバザル方形文字"
},
{
"start": 0x11A50,
"end": 0x11AAF,
"name": "Soyombo",
"japaneseName": "ソヨンボ文字"
},
{
"start": 0x11AC0,
"end": 0x11AFF,
"name": "Pau Cin Hau",
"japaneseName": "パウ・チン・ハウ文字"
},
{
"start": 0x11C00,
"end": 0x11C6F,
"name": "Bhaiksuki",
"japaneseName": "バイクシュキー文字"
},
{
"start": 0x11C70,
"end": 0x11CBF,
"name": "Marchen",
"japaneseName": "マルチェン文字"
},
{
"start": 0x11D00,
"end": 0x11D5F,
"name": "Masaram Gondi",
"japaneseName": "マサラム・ゴーンディー文字"
},
{
"start": 0x11D60,
"end": 0x11DAF,
"name": "Gunjala Gondi",
"japaneseName": "グンジャラ・ゴーンディー文字"
},
{
"start": 0x11EE0,
"end": 0x11EFF,
"name": "Makasar",
"japaneseName": "マカッサル文字"
},
{
"start": 0x11FC0,
"end": 0x11FFF,
"name": "Tamil Supplement",
"japaneseName": "タミル文字補助"
},
{
"start": 0x12000,
"end": 0x123FF,
"name": "Cuneiform",
"japaneseName": "楔形文字"
},
{
"start": 0x12400,
"end": 0x1247F,
"name": "Cuneiform Numbers and Punctuation",
"japaneseName": "楔形文字の数字及び句読点"
},
{
"start": 0x12480,
"end": 0x1254F,
"name": "Early Dynastic Cuneiform",
"japaneseName": "シュメール楔形文字"
},
{
"start": 0x13000,
"end": 0x1342F,
"name": "Egyptian Hieroglyphs",
"japaneseName": "エジプト聖刻文字"
},
{
"start": 0x13430,
"end": 0x1343F,
"name": "Egyptian Hieroglyph Format Controls",
"japaneseName": "エジプト聖刻文字書式制御記号"
},
{
"start": 0x14400,
"end": 0x1467F,
"name": "Anatolian Hieroglyphs",
"japaneseName": "アナトリア聖刻文字"
},
{
"start": 0x16800,
"end": 0x16A3F,
"name": "Bamum Supplement",
"japaneseName": "バムン文字補助"
},
{
"start": 0x16A40,
"end": 0x16A6F,
"name": "Mro",
"japaneseName": "ムロ文字"
},
{
"start": 0x16AD0,
"end": 0x16AFF,
"name": "Bassa Vah",
"japaneseName": "バサ文字"
},
{
"start": 0x16B00,
"end": 0x16B8F,
"name": "Pahawh Hmong",
"japaneseName": "パハウ・フモン文字"
},
{
"start": 0x16E40,
"end": 0x16E9F,
"name": "Medefaidrin",
"japaneseName": "メデファイドリン文字"
},
{
"start": 0x16F00,
"end": 0x16F9F,
"name": "Miao",
"japaneseName": "ミャオ文字"
},
{
"start": 0x16FE0,
"end": 0x16FFF,
"name": "Ideographic Symbols and Punctuation",
"japaneseName": "漢字の記号及び句読点"
},
{
"start": 0x17000,
"end": 0x187FF,
"name": "Tangut",
"japaneseName": "西夏文字"
},
{
"start": 0x18800,
"end": 0x18AFF,
"name": "Tangut Components",
"japaneseName": "西夏文字の構成要素"
},
{
"start": 0x1B000,
"end": 0x1B0FF,
"name": "Kana Supplement",
"japaneseName": "仮名補助"
},
{
"start": 0x1B100,
"end": 0x1B12F,
"name": "Kana Extended-A",
"japaneseName": "仮名拡張A"
},
{
"start": 0x1B130,
"end": 0x1B16F,
"name": "Small Kana Extension",
"japaneseName": "小書き仮名拡張"
},
{
"start": 0x1B170,
"end": 0x1B2FF,
"name": "Nushu",
"japaneseName": "女書"
},
{
"start": 0x1BC00,
"end": 0x1BC9F,
"name": "Duployan",
"japaneseName": "デュプロワイエ式速記"
},
{
"start": 0x1BCA0,
"end": 0x1BCAF,
"name": "Shorthand Format Controls",
"japaneseName": "速記書式制御記号"
},
{
"start": 0x1D000,
"end": 0x1D0FF,
"name": "Byzantine Musical Symbols",
"japaneseName": "ビザンチン音楽記号"
},
{
"start": 0x1D100,
"end": 0x1D1FF,
"name": "Musical Symbols",
"japaneseName": "音楽記号"
},
{
"start": 0x1D200,
"end": 0x1D24F,
"name": "Ancient Greek Musical Notation",
"japaneseName": "古代ギリシア音符記号"
},
{
"start": 0x1D2E0,
"end": 0x1D2FF,
"name": "Mayan Numerals",
"japaneseName": "マヤ数字"
},
{
"start": 0x1D300,
"end": 0x1D35F,
"name": "Tai Xuan Jing Symbols",
"japaneseName": "太玄経記号"
},
{
"start": 0x1D360,
"end": 0x1D37F,
"name": "Counting Rod Numerals",
"japaneseName": "算木用数字"
},
{
"start": 0x1D400,
"end": 0x1D7FF,
"name": "Mathematical Alphanumeric Symbols",
"japaneseName": "数学用英数字記号"
},
{
"start": 0x1D800,
"end": 0x1DAAF,
"name": "Sutton SignWriting",
"japaneseName": "サットン手話表記法"
},
{
"start": 0x1E000,
"end": 0x1E02F,
"name": "Glagolitic Supplement",
"japaneseName": "グラゴル文字補助"
},
{
"start": 0x1E100,
"end": 0x1E14F,
"name": "Nyiakeng Puachue Hmong",
"japaneseName": "Nyiakeng Puachue Hmong文字"
},
{
"start": 0x1E2C0,
"end": 0x1E2FF,
"name": "Wancho",
"japaneseName": "ワンチョ文字"
},
{
"start": 0x1E800,
"end": 0x1E8DF,
"name": "Mende Kikakui",
"japaneseName": "メンデ文字"
},
{
"start": 0x1E900,
"end": 0x1E95F,
"name": "Adlam",
"japaneseName": "アドラム文字"
},
{
"start": 0x1EC70,
"end": 0x1ECBF,
"name": "Indic Siyaq Numbers",
"japaneseName": "インド・シヤク数字"
},
{
"start": 0x1ED00,
"end": 0x1ED4F,
"name": "Ottoman Siyaq Numbers",
"japaneseName": "オスマン・シヤク数字"
},
{
"start": 0x1EE00,
"end": 0x1EEFF,
"name": "Arabic Mathematical Alphabetic Symbols",
"japaneseName": "アラビア数字記号"
},
{
"start": 0x1F000,
"end": 0x1F02F,
"name": "Mahjong Tiles",
"japaneseName": "マージャン記号"
},
{
"start": 0x1F030,
"end": 0x1F09F,
"name": "Domino Tiles",
"japaneseName": "ドミノ記号"
},
{
"start": 0x1F0A0,
"end": 0x1F0FF,
"name": "Playing Cards",
"japaneseName": "トランプ記号"
},
{
"start": 0x1F100,
"end": 0x1F1FF,
"name": "Enclosed Alphanumeric Supplement",
"japaneseName": "囲み英数字補助"
},
{
"start": 0x1F200,
"end": 0x1F2FF,
"name": "Enclosed Ideographic Supplement",
"japaneseName": "囲み漢字補助"
},
{
"start": 0x1F300,
"end": 0x1F5FF,
"name": "Miscellaneous Symbols and Pictographs",
"japaneseName": "その他の記号及び絵記号"
},
{
"start": 0x1F600,
"end": 0x1F64F,
"name": "Emoticons",
"japaneseName": "顔文字"
},
{
"start": 0x1F650,
"end": 0x1F67F,
"name": "Ornamental Dingbats",
"japaneseName": "装飾用絵記号"
},
{
"start": 0x1F680,
"end": 0x1F6FF,
"name": "Transport and Map Symbols",
"japaneseName": "交通及び地図記号"
},
{
"start": 0x1F700,
"end": 0x1F77F,
"name": "Alchemical Symbols",
"japaneseName": "錬金術記号"
},
{
"start": 0x1F780,
"end": 0x1F7FF,
"name": "Geometric Shapes Extended",
"japaneseName": "幾何学模様拡張"
},
{
"start": 0x1F800,
"end": 0x1F8FF,
"name": "Supplemental Arrows-C",
"japaneseName": "補助矢印C"
},
{
"start": 0x1F900,
"end": 0x1F9FF,
"name": "Supplemental Symbols and Pictographs",
"japaneseName": "補助記号及び絵記号"
},
{
"start": 0x1FA00,
"end": 0x1FA6F,
"name": "Chess Symbols",
"japaneseName": "チェス記号"
},
{
"start": 0x1FA70,
"end": 0x1FAFF,
"name": "Symbols and Pictographs Extended-A",
"japaneseName": "記号及び絵記号拡張A"
},
{
"start": 0x20000,
"end": 0x2A6DF,
"name": "CJK Unified Ideographs Extension B",
"japaneseName": "CJK統合漢字拡張B"
},
{
"start": 0x2A700,
"end": 0x2B73F,
"name": "CJK Unified Ideographs Extension C",
"japaneseName": "CJK統合漢字拡張C"
},
{
"start": 0x2B740,
"end": 0x2B81F,
"name": "CJK Unified Ideographs Extension D",
"japaneseName": "CJK統合漢字拡張D"
},
{
"start": 0x2B820,
"end": 0x2CEAF,
"name": "CJK Unified Ideographs Extension E",
"japaneseName": "CJK統合漢字拡張E"
},
{
"start": 0x2CEB0,
"end": 0x2EBEF,
"name": "CJK Unified Ideographs Extension F",
"japaneseName": "CJK統合漢字拡張F"
},
{
"start": 0x2F800,
"end": 0x2FA1F,
"name": "CJK Compatibility Ideographs Supplement",
"japaneseName": "CJK互換漢字補助"
},
{
"start": 0xE0000,
"end": 0xE007F,
"name": "Tags",
"japaneseName": "タグ"
},
{
"start": 0xE0100,
"end": 0xE01EF,
"name": "Variation Selectors Supplement",
"japaneseName": "字形選択子補助"
},
{
"start": 0xF0000,
"end": 0xFFFFF,
"name": "Supplementary Private Use Area-A",
"japaneseName": "補助私用領域A"
},
{
"start": 0x100000,
"end": 0x10FFFF,
"name": "Supplementary Private Use Area-B",
"japaneseName": "補助私用領域B"
},
];
// no break space
const nbs = String.fromCodePoint(160);
function sandwichNbs(char) {
return `${nbs}${char}${nbs}`
}
/**
* Unicode Blockの文字を得るためのクラス。容量が大きくなるので文字データは持たない。
*/
class UnicodeBlock {
/**
* Blockの開始符号位置、終了符号位置、名前、日本語名を設定する
* @param {Number} [start=0x0000] : 開始符号位置。0xhhh0でなければならない
* @param {Number} [end=0x0000] : 終了符号位置。 0xhhhFでなければならない
* @param {String} [name=""] : Block name
* @param {String} [japaneseName="" }] : Block name の日本語訳
*/
constructor({ start = 0x0000, end = 0x0000, name = "", japaneseName = "" }) {
this.start = 0x0000;
this.end = 0x0000;
this.name = "Invalid";
this.japaneseName = "無効";
this.outputFormat = { "linebreak": 0, "space": false };
if ((start & 0x0) === 0 || (end & 0xF) === 0xF) {
this.start = start;
this.end = end;
this.name = name;
this.japaneseName = japaneseName;
}
}
/**
* 出力の書式を設定する
* @param {Number} [linebreak=32] : 何文字ごとに改行を入れるか。0ならば入れない
* @param {[type]} [space=true] : 文字と文字の間に空白を入れるかどうか。入れないと結合文字が見づらい
*/
setOutputFormat({ linebreak = 32, space = true }) {
let lb = linebreak;
if (lb % 16 !== 0) {
lb = 32;
}
this.outputFormat = { lb, space };
}
/**
* Blockに割り当てられている文字列を返す
* @return {String} Blockに割り当てられている文字列
*/
get data() {
let str = "";
for (let i = this.start; i <= this.end; ++i) {
if (this.outputFormat.space) {
str += sandwichNbs(String.fromCodePoint(i));
} else {
str += String.fromCodePoint(i);
}
if (i !== this.start && this.outputFormat.linebreak !== 0 && (i + 1) % this.outputFormat.linebreak === 0) {
str += "\n";
}
}
return str;
}
}
/**
* Unicode Block全てを表示するクラス
*/
class UnicodeBlocks {
/**
* Unicode Block を定義したオブジェクトからUnicodeBlockを作って格納する。
* @param {Object[]} data : Unicode Block を定義したオブジェクトの配列
*/
constructor(data) {
this.data = [];
for (const datum of data) {
this.data.push(new UnicodeBlock(datum));
}
this.lastIndex = 0;
}
/**
* イテレータ
*/
*[Symbol.iterator]() {
while (this.lastIndex < this.data.length) {
yield this.data[this.lastIndex];
this.lastIndex += 1;
}
this.lastIndex = 0;
}
/**
* 入力したデータをもとにUnicode Blockを得る
* @param {string} name : 探したいUnicodeBlockのプロパティ名
* @param {integer|string} value : 探したいUnicodeBlockの値
* @return {UnicodeBlock} : 条件に合致するUnicodeBlock。見つからなかったらnullを返す。
*/
getFrom(name, value) {
for (const datum of this.data) {
if (datum[name] === value) {
return datum;
}
}
return null;
}
}
function toUpperHex(int) {
return int.toString(16).toUpperCase();
}
// 結合文字を結合させないために NoBreeakSpace で挟む
function insertSpace(string) {
const re = /(.)/usg;
return string.replace(re, `${sandwichNbs("$1")}`);
}
function insertLineBreak(string, length = 64) {
if (string.length <= length || length === 0) {
return string;
}
const re = new RegExp(`((?:...){${length}})`, "usg");
return string.replace(re, "$1\n");
}
function countChar(string) {
const re = /./usg;
const result = string.match(re);
if (result !== null) {
return result.length;
}
return 0;
}
function stringToCodePoint(string, merge = false, combinator = " ") {
const re = /./usg;
let result = string.match(re);
if (result !== null) {
result = result.map((v) => {
if (merge === false) {
return `U+${toUpperHex(v.codePointAt(0))}`;
} else {
return `${sandwichNbs(v)}= U+${toUpperHex(v.codePointAt(0))}`;
}
});
result = result.join(combinator);
return result
}
return null;
}
function stringToRegexCharacterClass(string) {
const re = /./usg;
let matches = string.match(re);
if (matches !== null) {
let result = [`[\\u${toUpperHex(matches[0].codePointAt(0))}`];
let startPos = true;
for (let i = 1; i < matches.length; ++i) {
if (matches[i].codePointAt(0) - matches[i - 1].codePointAt(0) === 1) {
if (startPos === false) {
result.pop()
}
startPos = false;
result.push(`-\\u${toUpperHex(matches[i].codePointAt(0))}`);
} else {
startPos = true;
result.push(`\\u${toUpperHex(matches[i].codePointAt(0))}`);
}
}
result.push("]");
result = result.join("");
return result;
}
return null;
}
function insertLineBreakToCodePoint(string, merge = false, combinator = " ", length = 64) {
// const re = new RegExp(`((?:.*?U\\+[0-9A-F]+${combinator}){${length}})`, "g");
let re = new RegExp(`((?:U\\+[0-9A-F]+${combinator}){${length}})`, "g");
if (merge === true) {
re = new RegExp(`((?:...= U\\+[0-9A-F]+${combinator}){${length}})`, "usg");
}
return string.replace(re, "$1\n");
}
function appendLineBreak(string) {
if (string[string.length - 1] !== "\n") {
return `${string}\n`;
}
return `${string}`;
}
const categories = [
{ "name": "Binary", "data": binary },
{ "name": "GeneralCategory", "data": generalCategory },
{ "name": "Script", "data": script },
{ "name": "ScriptExtensions", "data": scriptExtensions },
];
function wirteResult(file, string, writeChar = true, writeCodePoint = true, writeMix = true, writeClass = true) {
let outputString = `<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n`;
if (writeChar) {
outputString += `================================================================\n`;
const resultString = insertLineBreak(insertSpace(string));
outputString += appendLineBreak(resultString);
}
if (writeCodePoint) {
outputString += `================================================================\n`;
const resultCodePoint = insertLineBreakToCodePoint(stringToCodePoint(string));
outputString += appendLineBreak(resultCodePoint);
}
if (writeMix) {
outputString += `================================================================\n`;
const resultMerged = insertLineBreakToCodePoint(stringToCodePoint(string, true, `,${nbs}`), true, `,${nbs}`, 32);
outputString += appendLineBreak(resultMerged);
}
if (writeClass) {
outputString += `================================================================\n`;
const resultMerged = stringToRegexCharacterClass(string);
outputString += appendLineBreak(resultMerged);
}
outputString += `>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n\n`;
file.write(outputString);
}
function outputBlocks(blocks, categories, path) {
const file = fs.createWriteStream(path);
let result = null;
let i = 0;
for (const { name, data } of categories) {
for (const property of data) {
i = 0;
for (const block of blocks) {
result = block.data.match(property.regex)
if (result !== null) {
i += 1;
result = result.reduce((acc, val) => { return acc += val; }, "");
file.write(`${name}, ${property.name} ${i}, ${block.name} U+${toUpperHex(block.start)} - U+${toUpperHex(block.end)} ${countChar(result)} / ${block.end - block.start + 1}\n`);
wirteResult(file, result, true, true, true)
}
}
}
}
file.end();
}
// 未割当領域にもデータがあったりする
function makeString(start, end, includeUnassigned = false) {
let str = "";
let char = "";
const re = /\p{gc=Unassigned}/u;
for (let i = start; i <= end; ++i) {
char = String.fromCodePoint(i)
if (re.test(char) === false || includeUnassigned === true) {
str += char;
}
}
return str;
}
function outputStartToEnd(start, end, categories, path) {
const file = fs.createWriteStream(path);
let result = null;
const string = makeString(start, end);
for (const { name, data } of categories) {
for (const property of data) {
result = string.match(property.regex)
if (result !== null) {
result = result.reduce((acc, val) => { return acc += val; }, "");
file.write(`${name}, ${property.name} U+${toUpperHex(start)} - U+${toUpperHex(end)} ${countChar(result)} / ${end - start + 1}\n`);
wirteResult(file, result, true, true, true)
}
}
}
file.end();
}
let blocks1 = new UnicodeBlocks(data1);
let blocks2 = new UnicodeBlocks(data2);
outputBlocks(blocks1, categories, "block_output1.txt");
outputBlocks(blocks2, categories, "block_output2.txt");
outputStartToEnd(0x0, 0xFFFF, categories, "flat_output1.txt");
outputStartToEnd(0x10000, 0x2FA1F, categories, "flat_output2.txt");
outputStartToEnd(0xE0000, 0xE01EF, categories, "flat_output3.txt");
outputStartToEnd(0x0, 0xFFFFF, categories, "flat_output4.txt");
参考URLs
UNICODE CHARACTER DATABASE(http://www.unicode.org/reports/tr44/)
UNICODE REGULAR EXPRESSIONS(http://unicode.org/reports/tr18/)
UNICODE SCRIPT PROPERTY(https://www.unicode.org/reports/tr24/)
ECMAScript proposal: Unicode property escapes in regular expressions(https://github.com/tc39/proposal-regexp-unicode-property-escapes)
最後に
戻り読みとUnicodeプロパティが追加されたのは、とても嬉しいです。
戻り読みが出来ずに妥協していた正規表現を作り替えたり、日本語をうまく区別できずに諦めていた機能を作ってみたくなったりしました。
新機能が増えた結果、手間が増えるような気がしますが。
Atomのパッケージが作りたいのですが、本体が新機能に対応していないのにDeveloper Toolが対応しているという残念な状況のようです。
まだおあずけのようです。(´・ω・`)ヒモジイ