#tesseractのインストール
$ brew install tesseract
#tessetacを動かすライブラリをインストール
$ pip3 install pyocr
#日本語読み取り設定
$ curl -L -o /usr/local/share/tessdata/jpn.traineddata 'https://github.com/tesseract-ocr/tessdata/raw/master/jpn.traineddata'
$ tesseract --list-langs
List of available languages (4):
eng
jpn
osd
snum
#OCR実装
from PIL import Image
import sys
import pyocr
import pyocr.builders
tools = pyocr.get_available_tools()
if len(tools) == 0:
print("No OCR tool found")
sys.exit(1)
# The tools are returned in the recommended order of usage
tool = tools[0]
txt = tool.image_to_string(
Image.open('{path}'),
lang="jpn",
builder=pyocr.builders.TextBuilder(tesseract_layout=6)
)
print(txt)