1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

python 漢数字から算用数字に変換

Last updated at Posted at 2021-07-28

正規表現バージョン

import re

TRANSUNIT = {'': 10,
             '': 10,
             '': 100,
             '': 1000,
             '': 10000,
             '': 100000000,
             '': 1000000000000}

re_kunit = re.compile(
    "(?:(\d*)\s*(千)\s*)?(?:(\d*)\s*(百)\s*)?(?:(\d*)\s*([十拾])\s*)?(\d*)\s*([兆億万])|(?:(\d*)\s*(千)\s*)?(?:(\d*)\s*(百)\s*)?(?:(\d*)\s*([十拾])\s*)?(\d*)()")
knum = "一二三四五六七八九〇1234567890壱弐参"
anum = "12345678901234567890123"

def kanji2num(s):

    for i in range(len(anum)):
        s = s.replace(knum[i], anum[i])

    reg = re_kunit.findall(s)
    if not reg:
        return None

    ret = 0
    count = len(reg)
    for i in range(count):
        r = reg[i]

        digit = 0
        offset = 0
        if (not r[7]):
            offset = 4

        last_ar_idx = 2 * offset + 6
        last_ks_idx = last_ar_idx + 1

        for n in range(offset, offset + 3):
            ar = r[2 * n]
            ks = r[(2 * n) + 1]
            part = 1
            if ar:
                part *= int(ar)
            if ks:
                part *= TRANSUNIT[ks]
            if(part > 1):
                digit += part
        if(r[last_ar_idx]):
            digit += int(r[last_ar_idx])
        if(r[last_ks_idx]):
            digit *= TRANSUNIT[r[last_ks_idx]]

        ret += digit

    return ret


print(kanji2num("1億3千万百9十9")) # -> 130000199

正規表現使わないバージョン

上の正規表現は重いみたい

D1_UNIT = {
    '': 0, '': 1, '': 2, '': 3, '': 4, '': 5,
    '': 6, '': 7, '': 8, '': 9, '': 0, '': 1,
    '': 2, '': 3, '': 4, '': 5, '': 6, '': 7,
    '': 8, '': 9, '0': 0, '1': 1, '2': 2, '3': 3,
    '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
    '': 0, '': 1, '': 2, '': 3,
}

D3_UNIT = {
    '': 1000,
    '': 100,
    '': 10,
    '': 10,
}

LG_UNIT = {
    '': 10000000000000000,
    '': 1000000000000,
    '': 100000000,
    '': 10000,
}

Collections = {
    ' ': '',     # delete
    ' ': None,  # delete
    '': ',',   # replace
    '': ',',   # replace
    '': '.',   # replace
    ',': ',',   # through(ignoring)
}

def kanji2int(val):
    if (not val):
        return None

    d1 = []
    large = 0
    total = 0
    result = ""

    numbuild = lambda n:n and int("".join(map(str, n))) or 0 ## // <- C++ very slow
    # numbuild = lambda n:n and sum(int(x) * 10 ** i for i, x in enumerate(reversed(n))) or 0 ## // for C++ memo

    for i, kn in enumerate(val):
        if (kn in Collections):
            if kn == Collections[kn] or not kn: # through or delete
                continue
            else:
                kn = Collections[kn]  # replace

        if (kn in D1_UNIT):
            d1.append(D1_UNIT[kn])
        elif (kn in D3_UNIT):
            large += (numbuild(d1) if d1 else 1) * D3_UNIT[kn]
            d1.clear()
        elif ((d1 or large) and kn in LG_UNIT):
            large += numbuild(d1)
            total += (large or 1) * LG_UNIT[kn]
            d1.clear()
            large = 0
        elif(kn == "." and d1[0] == 0):
            result += "0."
            d1.clear()
        else:
            if d1 and d1[0] == 0:
                result += "".join(map(str, d1))
            else:
                chk = total + large + numbuild(d1)
                if chk:
                    result += str(chk)
            result += kn
            d1.clear()
            large = 0
            total = 0

    return result or str(total + large + numbuild(d1))

print(kanji2int("1億3千万百9十9")) # -> 130000199
1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?