More than 5 years have passed since last update.

python - 文字識別のための前処理をやってみる

Last updated at 2019-08-26Posted at 2019-08-26

やったこと

こんな背景色が二色の面白い画像の識別のための前処理
だけど、こんな１背景色の画像も混ざっている
これらを識別するための前処理をやってみた

Input

Output

clean.py

from PIL import Image

'''
背景色が二色系の画像かチェックします
'''
def check_double_background_color(grayImg):
    w, h = grayImg.size
    poslist = (
        (0, 0),
        (0, h - 1),
        (w - 1, 0),
        (w - 1, h - 1),
    )
    px00 = grayImg.getpixel(poslist[0])
    px0T = grayImg.getpixel(poslist[1])
    pxRB = grayImg.getpixel(poslist[2])
    pxRT = grayImg.getpixel(poslist[3])

    if px00 - px0T > 100:
        return True
    return False

'''
裏の境界線のY座標を取得します。
例の画像では赤色背景が始まるY座標を返却するでしょう
'''
def get_inner_border_pos_y(grayImg):
    width, height = grayImg.size
    pos = 0
    w = 0
    for h in range(height - 1):
        px = grayImg.getpixel((w, h))
        # px_prev = grayImg.getpixel((w, h - 1))
        px_next = grayImg.getpixel((w, h + 1))
        px_sabun = px - px_next;
        if px_sabun > 100:
            pos = h + 1
            break

    return pos

'''
背景色が二色の画像を変換します
'''
def convert_double_background_img(grayImg, pos):
    width, height = grayImg.size
    # 例の画像ではグレイにした後の赤色部分、つまり文字色になるでしょう
    fcolor = grayImg.getpixel((0, pos))
    for _h in range(pos, height):
        for _w in range(0, width):
            pixel = grayImg.getpixel((_w, _h))
            # 白に近い色を文字色に、赤の部分は背景白に置き換えます
            if 255 - pixel < 30:
                # 文字部分
                grayImg.putpixel((_w, _h), fcolor)
            else:
                grayImg.putpixel((_w, _h), 255)
    return grayImg


'''
ノイズ減少 [今回の例では k=3, gd=165 が適切でした]
'''
def noise_remove_pil(gray_img, k, gd):
    def calculate_noise_count(img_obj, w, h):
        count = 0
        width, height = img_obj.size
        for _w_ in [w - 1, w, w + 1]:
            for _h_ in [h - 1, h, h + 1]:
                if _w_ > width - 1:
                    continue
                if _h_ > height - 1:
                    continue
                if _w_ == w and _h_ == h:
                    continue
                if img_obj.getpixel((_w_, _h_)) < gd:
                    count += 1

        return count

    w, h = gray_img.size
    for _w in range(w):
        for _h in range(h):
            if _w == 0 or _h == 0:
                gray_img.putpixel((_w, _h), 255)
                continue
            pixel = gray_img.getpixel((_w, _h))
            if pixel == 255:
                continue

            if calculate_noise_count(gray_img, _w, _h) < k:
                gray_img.putpixel((_w, _h), 255)
    return gray_img

さあー実行関数

clean.py

def clean_one(imagePath):
    i = 3
    gray_img = Image.open(imagePath).convert('L')
    # convert double background images
    if check_double_background_color(gray_img):
        pos = get_inner_border_pos_y(gray_img)
        gray_img = convert_double_background_img(gray_img, pos)
    # noise remove
    # ここら辺は適切に調整してみると良いでしょう
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i+1, 245)
    gray_img = noise_remove_pil(gray_img, i+1, 245)
    gray_img = noise_remove_pil(gray_img, i+1, 245)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    gray_img = noise_remove_pil(gray_img, i, 165)
    return gray_img

# 実行
vcodepath = 'vcode.jpg'
newImg = clean_one(vcodepath)
newImg.save('vcode_clean.jpg')

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up