More than 1 year has passed since last update.

pythonでGoogle Drive API を使用した文字認識をする。

Posted at 2023-05-05

背景

写真で撮影した文字をPCに書き起こすのが手間だったので、文字認識を利用してテキスト化したかった。

環境

OS: windows 10.0
python 3.9

構成

PC内に保存した画像ファイルを画像変換し、textにします。画像変換は、google drive api for pythonを使用しています。

GUIは、Tkinterを用いて作成をしました。

作成

Google CloudPlatformでプロジェクトの作成

google-drive-ocrを使用し文字認識を行うためには、Google CloudPlatformでプロジェクトを作成する必要があります。こちらの記事を参考に作成をしました。ここでは、最終的にgoogle drive apiへアクセスするための*jsonファイルを取得します。

プログラム

画像ファイルH,.png .jpg .gifを選択できるようにしています。返還対象にするファイルは、1つしか選べないようにしています。変換後は、outputというディレクトリを生成し、そこに変換後のtextファイルを置くようにしました。

test.py

import shutil
import glob
import os
import tkinter as tk
from tkinter import filedialog
from google_drive_ocr.application import GoogleOCRApplication

app = GoogleOCRApplication(Google CloudPlatformでプロジェクトの作成で取得したjsonファイルのpassを設定してください。)

class Set_gui:
    def __init__(self, main_window):

        # Variable setting
        self.file_filter = [("image file", ".png .jpg .gif")]

        # Main window
        self.main_window = main_window
        self.main_window.geometry("1400x800")
        self.main_window.title("文字認識テキスト変換 (google-drive-ocr)  v0.10")

        # Sub window
        self.path_frame = tk.Frame(self.main_window, height=100, width=400)
        self.opr_frame = tk.Frame(self.main_window, height=100, width=400)

        # Widgetsmith
        self.path_frame.place(relx=0.40, rely=0.2)
        self.opr_frame.place(relx=0.40, rely=0.45)

        # 3 path_frame
        self.label_text = tk.StringVar(value="Image file with character")
        self.label_grid(self.path_frame, self.label_text, 0, 0)

        self.path_file_stvar = tk.StringVar()
        self.path_entry_grid(self.path_frame, self.path_file_stvar, 1, 0)
        self.btn_grid(self.path_frame, "select", self.on_click_file_path, 1, 1, tk.W)

        self.label_text = tk.StringVar(value="Output text file directory")
        self.label_grid(self.path_frame, self.label_text, 3, 0)

        self.path_dir_stvar = tk.StringVar()
        self.path_entry_grid(self.path_frame, self.path_dir_stvar, 4, 0)
        self.btn_grid(self.path_frame, "select", self.on_click_dir_path, 4, 1, tk.W)

        # 4 opr_frame
        self.btn_grid(self.opr_frame, "Start", self.on_click_start, 0, 0, tk.SE)
        self.btn_grid(self.opr_frame, "Exit", self.on_click_close, 0, 1, tk.SE)

    def title_label_grid(self, set_frame, title, r_num, c_num):
        label = tk.Label(set_frame, text=title, bg="white", relief=tk.RIDGE)
        return label.grid(row=r_num, column=c_num, sticky=tk.W + tk.E)

    def label_grid(self, set_frame, title, r_num, c_num):
        label = tk.Label(set_frame, textvariable=title)
        return label.grid(row=r_num, column=c_num, sticky=tk.W, padx=10, pady=10)

    def path_entry_grid(self, set_frame, stver, r_num, c_num):
        path_entry = tk.Entry(set_frame, textvariable=stver, width=70)
        return path_entry.grid(row=r_num, column=c_num, sticky=tk.EW, padx=10)

    def btn_grid(self, set_frame, btn_name, act_command, r_num, c_num, stk):
        button = tk.Button(set_frame, text=btn_name, width=10, command=act_command)
        return button.grid(row=r_num, column=c_num, sticky=stk, padx=10, pady=10)

    def on_click_file_path(self):
        self.image_path = self.get_file_path()
        self.path_file_stvar.set(self.image_path)

    def on_click_dir_path(self):
        self.image_text_path = self.get_dir_path()
        self.path_dir_stvar.set(self.image_text_path)

    def on_click_start(self):
        app.perform_ocr(self.image_path)
        if not os.path.isdir(self.image_text_path +'/output'):
            os.makedirs(self.image_text_path +'/output')
        for p in glob.glob(self.image_text_path +'/*.txt', recursive=True):
            shutil.move(p, self.image_text_path +'/output')
        
    def on_click_close(self):
        self.main_window.destroy()

    # Model
    def get_file_path(self):
        return filedialog.askopenfilename(
            title="Please select image file", filetypes=self.file_filter
        )
 
    # Model
    def get_dir_path(self):
        return filedialog.askdirectory(title="Please select output dir")    
    

def main():

    # 　Tk MainWindow
    main_window = tk.Tk()
    # Viewクラス生成
    Set_gui(main_window)
    # 　フレームループ処理
    main_window.mainloop()

if __name__ == "__main__":
    main()

トラブルメモ

Google API のinvalid_grantエラー

google drive APIを1年ぶりに使用しました。Google API のinvalid_grantエラーの対処方法をみると、しばらく使用していなかったため、Client IDを新しく発行しなおし、それを利用するようにすれば解決しました。

link

参考にしたリンク情報です。
Pythonでgoogle-drive-ocrを使用し文字認識を行う
 Welcome to Google OCR (Drive API v3)’s documentation!

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up