25
25

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

全角混じりの文字列の桁数を取得する(全角2桁として計算)

Last updated at Posted at 2014-11-10
s = "あいうえおabcde"

console.log(s.replace(/[\u0100-\uffff]/g,"--").length)

/*
15
 */

上記のコードだと大雑把すぎることがわかりましたので、コメントいただいた内容を反映してみました。


#ノーマルキャラクタ用判定関数の生成
init_UTF8_map = (scope) ->
	nonSurrogatePair = [
	 [0x00A1,0],[0x00A4,0],[0x00AA,0],[0x00C6,0],[0x00D0,0],[0x00E6,0],[0x00F0,0],[0x00FC,0]
	,[0x00FE,0],[0x0101,0],[0x0111,0],[0x0113,0],[0x011B,0],[0x012B,0],[0x0138,0],[0x0144,0]
	,[0x014D,0],[0x016B,0],[0x01CE,0],[0x01D0,0],[0x01D2,0],[0x01D4,0],[0x01D6,0],[0x01D8,0]
	,[0x01DA,0],[0x01DC,0],[0x0251,0],[0x0261,0],[0x02C4,0],[0x02C7,0],[0x02CD,0],[0x02D0,0]
	,[0x02DD,0],[0x02DF,0],[0x0401,0],[0x0451,0],[0x2010,0],[0x2030,0],[0x2035,0],[0x203B,0]
	,[0x203E,0],[0x2074,0],[0x207F,0],[0x20AC,0],[0x2103,0],[0x2105,0],[0x2109,0],[0x2113,0]
	,[0x2116,0],[0x2126,0],[0x212B,0],[0x21D2,0],[0x21D4,0],[0x21E7,0],[0x2200,0],[0x220B,0]
	,[0x220F,0],[0x2211,0],[0x2215,0],[0x221A,0],[0x2223,0],[0x2225,0],[0x222E,0],[0x2248,0]
	,[0x224C,0],[0x2252,0],[0x2295,0],[0x2299,0],[0x22A5,0],[0x22BF,0],[0x2312,0],[0x25CB,0]
	,[0x25EF,0],[0x2609,0],[0x261C,0],[0x261E,0],[0x2640,0],[0x2642,0],[0x266F,0],[0x273D,0]
	,[0x3000,0],[0xFFFD,0]
	,[0x00A7,0x00A8],[0x00AD,0x00AE],[0x00B0,0x00B4],[0x00B6,0x00BA],[0x00BC,0x00BF],[0x00D7,0x00D8]
	,[0x00DE,0x00E1],[0x00E8,0x00EA],[0x00EC,0x00ED],[0x00F2,0x00F3],[0x00F7,0x00FA],[0x0126,0x0127]
	,[0x0131,0x0133],[0x013F,0x0142],[0x0148,0x014B],[0x0152,0x0153],[0x0166,0x0167],[0x02C9,0x02CB]
	,[0x02D8,0x02DB],[0x0300,0x036F],[0x0391,0x03A9],[0x03B1,0x03C1],[0x03C3,0x03C9],[0x0410,0x044F]
	,[0x1100,0x115F],[0x2013,0x2016],[0x2018,0x2019],[0x201C,0x201D],[0x2020,0x2022],[0x2024,0x2027]
	,[0x2032,0x2033],[0x2081,0x2084],[0x2121,0x2122],[0x2153,0x2154],[0x215B,0x215E],[0x2160,0x216B]
	,[0x2170,0x2179],[0x2190,0x2199],[0x21B8,0x21B9],[0x2202,0x2203],[0x2207,0x2208],[0x221D,0x2220]
	,[0x2227,0x222C],[0x2234,0x2237],[0x223C,0x223D],[0x2260,0x2261],[0x2264,0x2267],[0x226A,0x226B]
	,[0x226E,0x226F],[0x2282,0x2283],[0x2286,0x2287],[0x2329,0x232A],[0x2460,0x24E9],[0x24EB,0x254B]
	,[0x2550,0x2573],[0x2580,0x258F],[0x2592,0x2595],[0x25A0,0x25A1],[0x25A3,0x25A9],[0x25B2,0x25B3]
	,[0x25B6,0x25B7],[0x25BC,0x25BD],[0x25C0,0x25C1],[0x25C6,0x25C8],[0x25CE,0x25D1],[0x25E2,0x25E5]
	,[0x2605,0x2606],[0x260E,0x260F],[0x2614,0x2615],[0x2660,0x2661],[0x2663,0x2665],[0x2667,0x266A]
	,[0x266C,0x266D],[0x2776,0x277F],[0x2E80,0x2FFB],[0x3001,0x303E],[0x3041,0x33FF],[0x3400,0x4DB5]
	,[0x4E00,0x9FBB],[0xA000,0xA4C6],[0xAC00,0xD7A3],[0xE000,0xF8FF],[0xF900,0xFAD9],[0xFE00,0xFE0F]
	,[0xFE10,0xFE19],[0xFE30,0xFE6B],[0xFF01,0xFF60],[0xFFE0,0xFFE6]
	]

	surrogatePair = [
	 [0x20000,0x2a6d6] 
	,[0x2a6d7,0x2f7ff]
	,[0x2f800,0x2fa1d]
	,[0x2fa1e,0x2fffd]
	,[0x30000,0x3fffd]
	,[0xe0100,0xe01ef]
	,[0xf0000,0xffffd]
	,[0x100000,0x10fffd]
	]

	# 1文字判定するためのマップ
	singleCharMap = {}
	# 範囲指定マップ
	rangeArray = []

	# 判定処理テーブルの初期化
	nonSurrogatePair.forEach( (v) ->
		if v[1] == 0
			singleCharMap[v[0]] = true 
		else
			rangeArray.push v
	)

	# 判定処理を行う関数を作成
	scope.isWideChar = (code) ->
		if code of singleCharMap
			return true
		for rng in rangeArray
			if rng[0] <= code and code <= rng[1]
				return true
		false

	scope.isSurrogateChar = (code) ->
		for rng in surrogatePair
			if rng[0] <= code and code <= rng[1]
				return true
		false

# 文字列の1文字づつのレングスを配列で返却
getCharCode = (string,scope=window) ->
	counter = 0
	i = 0
	l = string.length
	result = []

	while i < l
		c = string.charCodeAt(i)
		len = 1
		if 0xd800 <= c and c <= 0xd8ff # surrogate pair
			c = 0x10000 + ((c & 0x3ff) << 10) | (string.charCodeAt(i + 1) & 0x3ff)
			len = 2 if scope.isSurrogateChar(c)
			i++
		else
			# normal chars
			len = 2 if scope.isWideChar(c)

		result.push len
		i++
	result

# サロゲートペアの文字を作成
x = 0x20B9F;  #文字コードをセット
x -= 0x10000;
a = Math.floor(x / 0x400);  #Math.floor()で整数値に変換
a += 0xD800;
b = x % 0x400;
b += 0xDC00;
s = String.fromCharCode(a,b);
ss = s + s

# UTF-8有効マップの初期化と判定関数の生成
init_UTF8_map(window)

console.log ss
console.log getCharCode(ss).reduce( (x,y) -> x+y )
console.log getCharCode("あいうえお").reduce( (x,y) -> x+y )

/*
?? 
4 
10 
*/

25
25
4

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
25
25

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?