LoginSignup
25
25

More than 5 years have passed since last update.

全角混じりの文字列の桁数を取得する(全角2桁として計算)

Last updated at Posted at 2014-11-10
s = "あいうえおabcde"

console.log(s.replace(/[\u0100-\uffff]/g,"--").length)

/*
15
 */

上記のコードだと大雑把すぎることがわかりましたので、コメントいただいた内容を反映してみました。


#ノーマルキャラクタ用判定関数の生成
init_UTF8_map = (scope) ->
    nonSurrogatePair = [
     [0x00A1,0],[0x00A4,0],[0x00AA,0],[0x00C6,0],[0x00D0,0],[0x00E6,0],[0x00F0,0],[0x00FC,0]
    ,[0x00FE,0],[0x0101,0],[0x0111,0],[0x0113,0],[0x011B,0],[0x012B,0],[0x0138,0],[0x0144,0]
    ,[0x014D,0],[0x016B,0],[0x01CE,0],[0x01D0,0],[0x01D2,0],[0x01D4,0],[0x01D6,0],[0x01D8,0]
    ,[0x01DA,0],[0x01DC,0],[0x0251,0],[0x0261,0],[0x02C4,0],[0x02C7,0],[0x02CD,0],[0x02D0,0]
    ,[0x02DD,0],[0x02DF,0],[0x0401,0],[0x0451,0],[0x2010,0],[0x2030,0],[0x2035,0],[0x203B,0]
    ,[0x203E,0],[0x2074,0],[0x207F,0],[0x20AC,0],[0x2103,0],[0x2105,0],[0x2109,0],[0x2113,0]
    ,[0x2116,0],[0x2126,0],[0x212B,0],[0x21D2,0],[0x21D4,0],[0x21E7,0],[0x2200,0],[0x220B,0]
    ,[0x220F,0],[0x2211,0],[0x2215,0],[0x221A,0],[0x2223,0],[0x2225,0],[0x222E,0],[0x2248,0]
    ,[0x224C,0],[0x2252,0],[0x2295,0],[0x2299,0],[0x22A5,0],[0x22BF,0],[0x2312,0],[0x25CB,0]
    ,[0x25EF,0],[0x2609,0],[0x261C,0],[0x261E,0],[0x2640,0],[0x2642,0],[0x266F,0],[0x273D,0]
    ,[0x3000,0],[0xFFFD,0]
    ,[0x00A7,0x00A8],[0x00AD,0x00AE],[0x00B0,0x00B4],[0x00B6,0x00BA],[0x00BC,0x00BF],[0x00D7,0x00D8]
    ,[0x00DE,0x00E1],[0x00E8,0x00EA],[0x00EC,0x00ED],[0x00F2,0x00F3],[0x00F7,0x00FA],[0x0126,0x0127]
    ,[0x0131,0x0133],[0x013F,0x0142],[0x0148,0x014B],[0x0152,0x0153],[0x0166,0x0167],[0x02C9,0x02CB]
    ,[0x02D8,0x02DB],[0x0300,0x036F],[0x0391,0x03A9],[0x03B1,0x03C1],[0x03C3,0x03C9],[0x0410,0x044F]
    ,[0x1100,0x115F],[0x2013,0x2016],[0x2018,0x2019],[0x201C,0x201D],[0x2020,0x2022],[0x2024,0x2027]
    ,[0x2032,0x2033],[0x2081,0x2084],[0x2121,0x2122],[0x2153,0x2154],[0x215B,0x215E],[0x2160,0x216B]
    ,[0x2170,0x2179],[0x2190,0x2199],[0x21B8,0x21B9],[0x2202,0x2203],[0x2207,0x2208],[0x221D,0x2220]
    ,[0x2227,0x222C],[0x2234,0x2237],[0x223C,0x223D],[0x2260,0x2261],[0x2264,0x2267],[0x226A,0x226B]
    ,[0x226E,0x226F],[0x2282,0x2283],[0x2286,0x2287],[0x2329,0x232A],[0x2460,0x24E9],[0x24EB,0x254B]
    ,[0x2550,0x2573],[0x2580,0x258F],[0x2592,0x2595],[0x25A0,0x25A1],[0x25A3,0x25A9],[0x25B2,0x25B3]
    ,[0x25B6,0x25B7],[0x25BC,0x25BD],[0x25C0,0x25C1],[0x25C6,0x25C8],[0x25CE,0x25D1],[0x25E2,0x25E5]
    ,[0x2605,0x2606],[0x260E,0x260F],[0x2614,0x2615],[0x2660,0x2661],[0x2663,0x2665],[0x2667,0x266A]
    ,[0x266C,0x266D],[0x2776,0x277F],[0x2E80,0x2FFB],[0x3001,0x303E],[0x3041,0x33FF],[0x3400,0x4DB5]
    ,[0x4E00,0x9FBB],[0xA000,0xA4C6],[0xAC00,0xD7A3],[0xE000,0xF8FF],[0xF900,0xFAD9],[0xFE00,0xFE0F]
    ,[0xFE10,0xFE19],[0xFE30,0xFE6B],[0xFF01,0xFF60],[0xFFE0,0xFFE6]
    ]

    surrogatePair = [
     [0x20000,0x2a6d6] 
    ,[0x2a6d7,0x2f7ff]
    ,[0x2f800,0x2fa1d]
    ,[0x2fa1e,0x2fffd]
    ,[0x30000,0x3fffd]
    ,[0xe0100,0xe01ef]
    ,[0xf0000,0xffffd]
    ,[0x100000,0x10fffd]
    ]

    # 1文字判定するためのマップ
    singleCharMap = {}
    # 範囲指定マップ
    rangeArray = []

    # 判定処理テーブルの初期化
    nonSurrogatePair.forEach( (v) ->
        if v[1] == 0
            singleCharMap[v[0]] = true 
        else
            rangeArray.push v
    )

    # 判定処理を行う関数を作成
    scope.isWideChar = (code) ->
        if code of singleCharMap
            return true
        for rng in rangeArray
            if rng[0] <= code and code <= rng[1]
                return true
        false

    scope.isSurrogateChar = (code) ->
        for rng in surrogatePair
            if rng[0] <= code and code <= rng[1]
                return true
        false

# 文字列の1文字づつのレングスを配列で返却
getCharCode = (string,scope=window) ->
    counter = 0
    i = 0
    l = string.length
    result = []

    while i < l
        c = string.charCodeAt(i)
        len = 1
        if 0xd800 <= c and c <= 0xd8ff # surrogate pair
            c = 0x10000 + ((c & 0x3ff) << 10) | (string.charCodeAt(i + 1) & 0x3ff)
            len = 2 if scope.isSurrogateChar(c)
            i++
        else
            # normal chars
            len = 2 if scope.isWideChar(c)

        result.push len
        i++
    result

# サロゲートペアの文字を作成
x = 0x20B9F;  #文字コードをセット
x -= 0x10000;
a = Math.floor(x / 0x400);  #Math.floor()で整数値に変換
a += 0xD800;
b = x % 0x400;
b += 0xDC00;
s = String.fromCharCode(a,b);
ss = s + s

# UTF-8有効マップの初期化と判定関数の生成
init_UTF8_map(window)

console.log ss
console.log getCharCode(ss).reduce( (x,y) -> x+y )
console.log getCharCode("あいうえお").reduce( (x,y) -> x+y )

/*
?? 
4 
10 
*/

25
25
4

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
25
25