正規表現バージョン
import re
TRANSUNIT = {'十': 10,
'拾': 10,
'百': 100,
'千': 1000,
'万': 10000,
'億': 100000000,
'兆': 1000000000000}
re_kunit = re.compile(
"(?:(\d*)\s*(千)\s*)?(?:(\d*)\s*(百)\s*)?(?:(\d*)\s*([十拾])\s*)?(\d*)\s*([兆億万])|(?:(\d*)\s*(千)\s*)?(?:(\d*)\s*(百)\s*)?(?:(\d*)\s*([十拾])\s*)?(\d*)()")
knum = "一二三四五六七八九〇1234567890壱弐参"
anum = "12345678901234567890123"
def kanji2num(s):
for i in range(len(anum)):
s = s.replace(knum[i], anum[i])
reg = re_kunit.findall(s)
if not reg:
return None
ret = 0
count = len(reg)
for i in range(count):
r = reg[i]
digit = 0
offset = 0
if (not r[7]):
offset = 4
last_ar_idx = 2 * offset + 6
last_ks_idx = last_ar_idx + 1
for n in range(offset, offset + 3):
ar = r[2 * n]
ks = r[(2 * n) + 1]
part = 1
if ar:
part *= int(ar)
if ks:
part *= TRANSUNIT[ks]
if(part > 1):
digit += part
if(r[last_ar_idx]):
digit += int(r[last_ar_idx])
if(r[last_ks_idx]):
digit *= TRANSUNIT[r[last_ks_idx]]
ret += digit
return ret
print(kanji2num("1億3千万百9十9")) # -> 130000199
正規表現使わないバージョン
上の正規表現は重いみたい
D1_UNIT = {
'〇': 0, '一': 1, '二': 2, '三': 3, '四': 4, '五': 5,
'六': 6, '七': 7, '八': 8, '九': 9, '0': 0, '1': 1,
'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7,
'8': 8, '9': 9, '0': 0, '1': 1, '2': 2, '3': 3,
'4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
'零': 0, '壱': 1, '弐': 2, '参': 3,
}
D3_UNIT = {
'千': 1000,
'百': 100,
'十': 10,
'拾': 10,
}
LG_UNIT = {
'京': 10000000000000000,
'兆': 1000000000000,
'億': 100000000,
'万': 10000,
}
Collections = {
' ': '', # delete
' ': None, # delete
',': ',', # replace
'、': ',', # replace
'.': '.', # replace
',': ',', # through(ignoring)
}
def kanji2int(val):
if (not val):
return None
d1 = []
large = 0
total = 0
result = ""
numbuild = lambda n:n and int("".join(map(str, n))) or 0 ## // <- C++ very slow
# numbuild = lambda n:n and sum(int(x) * 10 ** i for i, x in enumerate(reversed(n))) or 0 ## // for C++ memo
for i, kn in enumerate(val):
if (kn in Collections):
if kn == Collections[kn] or not kn: # through or delete
continue
else:
kn = Collections[kn] # replace
if (kn in D1_UNIT):
d1.append(D1_UNIT[kn])
elif (kn in D3_UNIT):
large += (numbuild(d1) if d1 else 1) * D3_UNIT[kn]
d1.clear()
elif ((d1 or large) and kn in LG_UNIT):
large += numbuild(d1)
total += (large or 1) * LG_UNIT[kn]
d1.clear()
large = 0
elif(kn == "." and d1[0] == 0):
result += "0."
d1.clear()
else:
if d1 and d1[0] == 0:
result += "".join(map(str, d1))
else:
chk = total + large + numbuild(d1)
if chk:
result += str(chk)
result += kn
d1.clear()
large = 0
total = 0
return result or str(total + large + numbuild(d1))
print(kanji2int("1億3千万百9十9")) # -> 130000199