図形認識AI

Python3

Posted at 2024-07-19

chatGPTに書いてもらったコードに、必要な箇所だけ修正したり、パスを設定して作りました。ファイル名はshape_and_text_recognition.pyです。Tesseractというのをインストールして環境変数を設定してからコード中にパスを記載し、
Dense(2, activation='softmax') # クラス数に応じて変更
の部分の数字の2はフォルダの数です。図形を認識できるAIを作りたくてwordの図形描画で描いたものを画像として保存してフォルダにいくつか入れました。
モデルのトレーニングの箇所で、トレーニング用のフォルダのパスを指定します。
train_generator = train_datagen.flow_from_directory(
r'C:\Users\Owner\Desktop\AI\data\train',
target_size=(64, 64),
batch_size=32,
class_mode='categorical'
)
最後に下のメイン処理の箇所で
図形認識と文字認識の統合の項目の
image_path = r'C:\Users\Owner\Desktop\triangle0.png'を
認識したい画像のパスを入力して実行するだけです。
あまりうまくはいきませんでしたが、エラーを解決できたのは初めてだったので、今後の改良につなげます。

python

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import pytesseract
from PIL import Image
import numpy as np

# Tesseractのパス設定（必要に応じて変更）
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Owner\AppData\Local\Programs\Tesseract-OCR\tesseract.exe'

# 図形認識用のCNNモデル定義
def create_shape_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
        MaxPooling2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(2, activation='softmax')  # クラス数に応じて変更
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# モデルのトレーニング
def train_shape_model(model):
    train_datagen = ImageDataGenerator(rescale=1./255)
    train_generator = train_datagen.flow_from_directory(
        r'C:\Users\Owner\Desktop\AI\data\train',
        target_size=(64, 64),
        batch_size=32,
        class_mode='categorical'
    )
    model.fit(train_generator, epochs=10)
    model.save('shape_model.h5')

# 図形認識用の前処理
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    img_resized = cv2.resize(img, (64, 64))
    img_array = np.array(img_resized) / 255.0
    return np.expand_dims(img_array, axis=0)

# 図形認識
def recognize_shape(model, img_path):
    preprocessed_img = preprocess_image(img_path)
    predictions = model.predict(preprocessed_img)
    class_index = np.argmax(predictions)
    return class_index  # クラスのインデックスを返す

# 文字認識
def recognize_text(img_path):
    image = cv2.imread(img_path)
    text = pytesseract.image_to_string(Image.fromarray(image))
    return text

# メイン処理
def main():
    # 1. 図形認識モデルの作成とトレーニング
    shape_model = create_shape_model()
    train_shape_model(shape_model)

    # 2. 図形認識と文字認識の統合
    image_path = r'C:\Users\Owner\Desktop\triangle0.png'
    
    # 図形認識の実行
    shape_class = recognize_shape(shape_model, image_path)
    print("Recognized shape class:", shape_class)

    # 文字認識の実行
    text = recognize_text(image_path)
    print("Recognized text:", text)

if __name__ == "__main__":
    main()

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up