感情分析AIを作ってみた

Posted at 2024-09-14

背景

インターンで学生向けに感情分析AIを作成して遊んでもらいました．
非接触技術を体験してもらおうと思ったときに，簡単に作れるものって何かな～考えた時に
感情分析くらいだったらいけるかな～と思い適当に作成してみました．
なんか間違ってたら教えてください～～～

環境

Windows11
Python 3.11.3
dlib, cmakeなどなど...（エラー文を見て足りてないな～と思うの追加してみてください）

機械学習

37000枚の画像をCNNを使ってモデル化しました．
以下のサイトで学習用のデータセットを保存しました．
https://www.kaggle.com/

モデルの作成

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# データジェネレータの作成
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1.0/255.0)

# データジェネレータからデータをロード
train_generator = train_datagen.flow_from_directory(
    'models/train',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = test_datagen.flow_from_directory(
    'models/test',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(7, activation='softmax')  # クラスの数に応じてユニット数を変更
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.save('path_to_save_model.h5')

メイン

import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
import mediapipe as mp
import dlib

# oneDNNオプションを無効化
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

# 感情認識モデルのロード
emotion_model_path = 'models/path_to_save_model.h5'
emotion_classifier = load_model(emotion_model_path)

# 感情ラベル
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

# Haar Cascadeのファイルパス
face_cascade_path = 'contents/haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(face_cascade_path)

# dlibの顔検出器とランドマークモデルの初期化
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("contents/shape_predictor_68_face_landmarks.dat")

# MediaPipeの初期化
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(max_num_faces=1)

# カメラのキャプチャ
cap = cv2.VideoCapture(0)  # カメラデバイス0をオープン

if not cap.isOpened():
    print("Error: Could not open camera.")
    exit()  # カメラが開けない場合は終了
else:
    print("Camera opened successfully.")

def preprocess_input(face):
    # 入力画像のリサイズ
    face = cv2.resize(face, (150, 150))  # 入力画像のサイズをモデルに合わせる
    face = face.astype('float32') / 255.0  # 正規化
    face = np.expand_dims(face, axis=0)  # バッチ次元を追加
    return face

# メインループ
while True:
    # フレームの読み込み
    ret, frame = cap.read()
    if not ret:
        print("Error: Could not read frame.")
        break

    # フレームをグレースケールに変換
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # 顔検出
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    # 検出された顔に対して処理を行う
    for (x, y, w, h) in faces:
        # 検出された顔を切り出してリサイズ
        roi = frame[y:y+h, x:x+w]

        # 入力形状の確認
        face = preprocess_input(roi)
        print("Input shape:", face.shape)

        # 感情予測
        predictions = emotion_classifier.predict(face)[0]
        label = emotion_labels[predictions.argmax()]

        # 感情ラベルと矩形を描画
        cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # MediaPipeによる顔検出とランドマーク検出
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_frame)
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            # ランドマーク座標を取得 (例: 目、口周りの座標)
            # ... (MediaPipeのドキュメント参照)

            # 取得したランドマーク座標から特徴量を抽出
            # ... (独自のアルゴリズムまたは既存のモデルを利用)

            # 抽出された特徴量を、事前に学習済みの感情分類モデルに入力し、感情を予測
            # ... (学習済みのモデルを用意する必要がある)
            predicted_emotion = "emotion AI"  # 例: 暫定的にNeutralとする

            # 結果を表示
            cv2.putText(frame, predicted_emotion, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # カメラ映像を表示
    cv2.imshow('Emotion Recognition', frame)

    # 'q'キーが押されたらループを抜ける
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# カメラのリリースとウィンドウの破棄
cap.release()
cv2.destroyAllWindows()

結果

初めからラベル付けをしてくれているデータセットだったので簡単に作れました．
学生さんに楽しんでもらえました．評判も良かったと思います．
ただ，AIを研究・開発している部署じゃないので，AIを使った，とかCNNで～みたいな言葉は使わずに，トラッキング技術を使ったデモ作ってみましたと説明しました．
業務の片手間に作ったとしては，良い感じの出来なんじゃないかなと思います．

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up