pythonでOCR処理-Tesseractの使用-

Posted at 2025-01-11

概要

pythonでTesseractを用いて数字と文字をOCR処理する。
事前準備等の詳細は省略していますのでご了承ください。

実行環境

・windows：Windows11Pro 23H2
・python：3.12.3

参考記事

以下の記事はとても参考になりました。

事前準備

1)Tesseractのインストール
2)PyOCRのインストール

文字のOCR処理

ocr.py

import os
from PIL import Image
import pyocr
import pyocr.builders

# インストール済みのTesseractのパスを通す
path_tesseract = r"C:\Program Files\Tesseract-OCR"  # 必要に応じてパスを変更
if path_tesseract not in os.environ["PATH"].split(os.pathsep):
    os.environ["PATH"] += os.pathsep + path_tesseract

# OCRエンジンの取得
tools = pyocr.get_available_tools()
if len(tools) == 0:
    print("Tesseract OCRが見つかりません。インストールされているか、PATHが正しいか確認してください。")
    exit(1)
tool = tools[0]

# 原稿画像の読み込み
img_org = Image.open("ocr-test.png")
img_rgb = img_org.convert("RGB")
pixels = img_rgb.load()

# 原稿画像加工（黒っぽい色以外は白=255,255,255にする）
c_max = 169
for j in range(img_rgb.size[1]):
    for i in range(img_rgb.size[0]):
        if (pixels[i, j][0] > c_max or pixels[i, j][1] > c_max or
                pixels[i, j][0] > c_max):
            pixels[i, j] = (255, 255, 255)

# ＯＣＲ実行
builder = pyocr.builders.TextBuilder()
result = tool.image_to_string(img_rgb, lang="jpn", builder=builder)

print(result)

数字のOCR処理

ocr_num.py

import cv2
import numpy as np
from PIL import Image
import pytesseract

# Tesseractのパスを設定（必要に応じて変更）
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# 画像を読み込む
image_path = 'test.JPG'
image = cv2.imread(image_path)

# 前処理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 150)

# 数字の輪郭を検出
contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# 認識結果を保存するリスト
recognized_numbers = []

for contour in contours:
    # 輪郭の外接矩形を取得
    x, y, w, h = cv2.boundingRect(contour)
    if w > 10 and h > 10:  # サイズが小さい輪郭を無視
        roi = gray[y:y+h, x:x+w]  # 数字の領域を切り出し
        roi_pil = Image.fromarray(roi)  # PIL形式に変換
        number = pytesseract.image_to_string(roi_pil, config='--psm 10 digits')  # 数字を認識
        recognized_numbers.append(number.strip())

# 結果を表示
print("Recognized numbers:", recognized_numbers)

# 認識した領域を描画
for contour in contours:
    x, y, w, h = cv2.boundingRect(contour)
    if w > 10 and h > 10:
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)

cv2.imshow('Image', image)
cv2.waitKey(0)
cv2.destroyAllWindows()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up