More than 1 year has passed since last update.

ChatGPTを用いた発声練習補助ツールを作成してみた。

Posted at 2023-05-14

私は、発話出来ないので、このようなツールで発声練習しているのですが、ChatGPTで発声練習用のテキストを生成して、発声練習した文と音声をデータとして、保存するツールを作成してみました。

ツール内容としては、以下です。
ユーザーがLINEに送付したテキストから、ChatGPTが発声練習用のテキストを生成する
↓
ユーザーがLINEでChatGPTが生成した発声練習用のテキストを読み上げて録音したら、録音した音声とChatGPTが生成した発声練習用のテキストをインスタントの指定したディレクトリに保存する

今回もAWSのインスタンスイメージがubuntuのインスタンスを使用しています。

手順は、ツールを起動中にするだけです。

ツールを起動中にする

まず、インスタンスにSSH接続します。

インスタンスのコンソールが開いたら以下をインストールします。

aptのアップデート

sudo apt update

ffmpegのインストール

sudo apt-get install ffmpeg

pipのインストール

sudo apt install python3-pip

LINEのインストール

sudo pip install line-bot-sdk

ngrokのインストール

sudo pip install pyngrok

flaskのインストール

sudo pip install flask

openaiのインストール

sudo pip install openai

pydubのインストール

sudo pip install pydub

データを保存するディレクトリを作成します。

mkdir ディレクトリ名

作成したディレクトリに移動します。

cd ディレクトリ名

テキスト保存用のディレクトリを作成します。

mkdir ディレクトリ名

音声保存用のディレクトリを作成します。

mkdir ディレクトリ名

一度、ホームディレクトリに戻ります。

cd

ホームディレクトリに戻ったら、pythonのファイルを保存するディレクトリを作成します。

mkdir ディレクトリ名

pythonのファイルを保存するディレクトリに移動します。

cd ディレクトリ名

作成したディレクトリでファイルを作成します。

nano ファイル名.py

作成したファイルの中に下記のbotのコードを入れて、ctrl oでファイルを保存して、ctrl xでファイルを閉じます。
※先程作成したテキストと音声を保存するディレクトリをそれぞれコード内で指定してください。
※LINE、ngrok、openaiのキーやトークンをコードに入れてください。これらの取得については、こちらを参考にしてください。

# LINEのチャネルシークレット
LINE_CHANNEL_SECRET = ''
# LINEのチャネルアクセストークン
LINE_ACCESS_TOKEN = ''
# ngrokのAuthtoken
NGROK_AUTHTOKEN = ''
# openaiのキー
OPENAI_API_KEY = ""

import os
import datetime
import tempfile
import wave
import io
from io import BytesIO
import requests
from flask import Flask, request, abort
from linebot import LineBotApi, WebhookHandler
from linebot.exceptions import InvalidSignatureError
from linebot.models import MessageEvent, AudioMessage, TextSendMessage, TextMessage, ImageSendMessage
import pydub
from pydub import AudioSegment
import openai
from pyngrok import ngrok
from pyngrok.conf import PyngrokConfig


# URLの生成
ngrok.set_auth_token(NGROK_AUTHTOKEN)
os.system('kill -9 $(pgrep ngrok)')
https_tunnel = ngrok.connect(addr='127.0.0.1:5000',bind_tls=True)
print(https_tunnel)


app = Flask(__name__)


# テキストファイルと音声ファイルの保存先のディレクトリ
text_dir_path = "/path/to/your/directory"
audio_dir_path = "/path/to/your/directory"


openai.api_key = OPENAI_API_KEY
line_bot_api = LineBotApi(LINE_ACCESS_TOKEN)
handler = WebhookHandler(LINE_CHANNEL_SECRET)

@app.route("/test")
def test():
    return "TEST OK"

@app.route("/", methods=['POST'])
def callback():
    # get X-Line-Signature header value
    signature = request.headers['X-Line-Signature']

    # get request body as text
    body = request.get_data(as_text=True)
    app.logger.info("Request body: " + body)

    # handle webhook body
    try:
        handler.handle(body, signature)
    except InvalidSignatureError:
        print("Invalid signature. Please check your channel access token/channel secret.")
        abort(400)

    return 'OK'


def delete_latest_files(user_id):
    user_text_files = [os.path.join(text_dir_path, file) for file in os.listdir(text_dir_path) if file.startswith(user_id) and file.endswith('.txt')]
    user_audio_files = [os.path.join(audio_dir_path, file) for file in os.listdir(audio_dir_path) if file.startswith(user_id) and file.endswith('.wav')]

    if not user_text_files or not user_audio_files:
        return False

    latest_text_file = max(user_text_files, key=os.path.getctime)
    latest_audio_file = max(user_audio_files, key=os.path.getctime)

    if os.path.splitext(os.path.basename(latest_text_file))[0] == os.path.splitext(os.path.basename(latest_audio_file))[0]:
        os.remove(latest_text_file)
        os.remove(latest_audio_file)
        return True
    else:
        return False


user_deletion_statuses = {}  # ユーザーIDをキーとして、直前の削除要求状態を格納

@handler.add(MessageEvent, message=TextMessage)
def handle_text_message(event):
    # 受信したテキストをChatGPTで処理する
    text = event.message.text
    user_id = event.source.user_id

    if text.lower() == "削除":
        if user_id not in user_deletion_statuses or not user_deletion_statuses[user_id]:
            if delete_latest_files(user_id):
                reply_text = "最新のテキストファイルと音声ファイルを削除しました。"
                user_deletion_statuses[user_id] = True
            else:
                reply_text = "対応するデータが存在せず、データを削除できません。"
        else:
            reply_text = "既にデータを削除しました。テキストを入力してください。"
    else:
        temp_file_name = f"{user_id}.temp.txt"
        temp_file_path = os.path.join(text_dir_path, temp_file_name)

        if os.path.exists(temp_file_path):
            with open(temp_file_path, "r") as f:
                reply_text = f.read()
        else:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "以下をテーマに五十音が満遍なく含まれる日本語の発声練習用の200文字以内の例文を作成する。例文のみ出力する。例文の説明は不用。"},
                    {"role": "user", "content": text},
                ]
            )
            reply_text = response["choices"][0]["message"]["content"]

            with open(temp_file_path, "w") as f:
                f.write(reply_text)

        user_deletion_statuses[user_id] = False

    line_bot_api.reply_message(event.reply_token, TextMessage(text=reply_text))


@handler.add(MessageEvent, message=AudioMessage)
def handle_audio_message(event):
    user_id = event.source.user_id
    text_file_name = f"{user_id}.temp.txt"
    text_file_path = os.path.join(text_dir_path, text_file_name)

    if os.path.exists(text_file_path):
        # Get audio data from LINE message
        audio_data = line_bot_api.get_message_content(event.message.id).content

        # Convert the audio data to a compatible format
        audio_segment = AudioSegment.from_file(io.BytesIO(audio_data), format="m4a")
        audio_segment = audio_segment.set_frame_rate(16000).set_channels(1)

        # Save the audio data as a WAV file
        audio_received_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
        audio_file_name = f"{user_id}.{event.message.id}.{audio_received_time}.wav"
        audio_file_path = os.path.join(audio_dir_path, audio_file_name)

        with wave.open(audio_file_path, "wb") as wf:
            wf.setnchannels(audio_segment.channels)
            wf.setsampwidth(audio_segment.sample_width)
            wf.setframerate(audio_segment.frame_rate)
            wf.writeframes(audio_segment.get_array_of_samples().tobytes())

        new_text_file_name = f"{user_id}.{event.message.id}.{audio_received_time}.txt"
        new_text_file_path = os.path.join(text_dir_path, new_text_file_name)

        os.rename(text_file_path, new_text_file_path)

        line_bot_api.reply_message(event.reply_token, TextMessage(text="音声データを保存しました。"))
    else:
        line_bot_api.reply_message(event.reply_token, TextMessage(text="テキストファイルがありません。"))

if __name__ == "__main__":
    app.run()

一度、上手くファイルが作成されているか下記のコマンドで確認します。

find ~

上手くファイルが作成されていたら、screenを作成します。

screen -S スクリーン名

ルート権限で、screen内でpythonで作成したファイルを実行します。

sudo python3 ファイル名.py

実行したら、生成されたURLを取得して、LINE Developers のダッシュボードにwebhook URLを設定します。(詳しくは、こちらを参考にしてください。)

※稀にエラーが発生します。エラーが発生した場合、エラー処理して頂くか、インスタンスを再起動してください。

webhook URLを設定後、LINE Developersで検証を押します。
成功と表示されたら、上手く接続出来ています。(インスタンス側では、上手くいっている場合は200と返されます。500の場合、プログラムに問題ありなどなので、普通のエラーと同じです。)

ここまで出来たら、ツールをテストします。
まず、LINE botを友達登録してテキストを送信します。
ChatGPTが発声練習用のテキストをLINE botから返してきたら、LINEのテキスト入力の右横のボタンを押して、音声入力のボタンを押しながら録音します。
削除機能をテストするために削除と入力します。
上記、実行後、botから以下の画像のような返答があり、インスタンスにデータがあれば、成功です！
※ユーザーが例外的な操作を行った場合、LINEにエラーを返します。
※データ名は、ユーザーID.メッセージID.時間としています。

最後に

ChatGPTを使うと生活が豊かになっていくのを感じますね。
何年かかるかわかりませんが、データが溜まってきたら、音声認識AIをファインチューニングしてみたいなと思っています。
また何か作ってみます。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up