LiveViewでLLM + VOICEVOXの非同期高速化

Last updated at 2025-12-11Posted at 2025-12-08

このコラムはLiveViewでLLM(Ollama) + VOICEVOXの非同期高速化を検証します
検証した理由はAIアバター作成した時にレスポンスが悪かった

AIアバター作成時は
Ollama
|> VOICEVOX
|> wavファイル取得
|> OSでaplayを使って再生
の処理の流れで作っています

この為、Ollamaの処理がすべて終わらないと再生されない

今回の目標

Ollamaを実行する時は非同期
Ollamaはリアルタイムで結果を取得して実行
- 「。」「、」ごとにVOICEVOXで音声にする
- ブラウザで音声を再生する

つまり、Ollamaの処理が途中でも、文単位で再生します
よって、レスポンスアップします

実行イメージ

前提知識

プログラムを書く

「LiveViewでVOICEVOXを楽に使う手段を作ってみた」の差分

assets/js/hooks/voicex.js


// VOICEVOX EngineのURL
const VOICEVOX_URL = "http://localhost:50021"; 

Voicex = {
    // ライフサイクルコールバック (要素がDOMに追加され、LiveViewと接続された時に実行)
    mounted() {
        // Elixirサーバー側から送信されるイベントをリッスン
        // イベント名: "synthesize_and_play"
        // ペイロード: { text: "...", speaker_id: N }
        this.handleEvent("synthesize_and_play", ({ text, speaker_id }) => {
            this.speakText(text, speaker_id);
        });
    },

    // --- 1. VOICEVOX API通信関数 (Hook内部関数として定義) ---

    /**
     * 1. VOICEVOX APIを使って音声合成クエリを取得します (audio_query)。
     */
    async fetchAudioQuery(text, speakerId) {
        const queryParams = new URLSearchParams({ text: text, speaker: speakerId });
        const queryUrl = `${VOICEVOX_URL}/audio_query?${queryParams}`;

        const queryResponse = await fetch(queryUrl, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' }
        });

        if (!queryResponse.ok) {
            throw new Error(`audio_query failed with status ${queryResponse.status}`);
        }
        return await queryResponse.json();
    },

    /**
     * 2. VOICEVOX APIを使って音声合成を実行し、WAV形式のBlobを取得します (synthesis)。
     */
    async fetchSynthesis(audioQuery, speakerId) {
        const synthesisParams = new URLSearchParams({ speaker: speakerId });
        const synthesisUrl = `${VOICEVOX_URL}/synthesis?${synthesisParams}`;

        const synthesisResponse = await fetch(synthesisUrl, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify(audioQuery)
        });

        if (!synthesisResponse.ok) {
            throw new Error(`synthesis failed with status ${synthesisResponse.status}`);
        }
        return await synthesisResponse.blob();
    },

    // --- 2. コアロジック関数 (Hook内部関数として定義) ---

    /**
     * VOICEVOX APIを使って音声データ(Blob)を取得する純粋なロジック関数。
     */
    async synthesizeTextToBlob(text, speakerId) {
        const trimmedText = text.trim();
        if (!trimmedText) {
            throw new Error("Text input is empty."); 
        }

        // 1. クエリ取得 (Hook内部関数を呼び出し)
        const audioQuery = await this.fetchAudioQuery(trimmedText, speakerId);

+       audioQuery.speedScale = 1.5;
        
        // 2. 音声合成 (Hook内部関数を呼び出し)
        const wavBlob = await this.fetchSynthesis(audioQuery, speakerId);

        return wavBlob;
    },

    // --- 3. メインアプリケーション関数 (Hookの主要ロジック) ---

    /**
     * 音声合成と再生を実行するメイン関数。
     */
    async speakText(text, speakerId) {
        try {
            // 1. コアロジックを呼び出し、Blobを取得 (Hook内部関数を呼び出し)
            const wavBlob = await this.synthesizeTextToBlob(text, speakerId);
            
            // 2. JavaScript側で <audio> 要素を生成
            const audioPlayer = new Audio();
            
            // 3. 再生ロジック
            const audioUrl = URL.createObjectURL(wavBlob);
            audioPlayer.src = audioUrl;
            
            // 4. 再生開始
            await audioPlayer.play();

            // 5. 再生終了後のクリーンアップ
            audioPlayer.onended = () => {
                URL.revokeObjectURL(audioUrl);
+               this.pushEvent("voice_playback_finished", { status: "ok" });
            };
            audioPlayer.onerror = () => {
                 URL.revokeObjectURL(audioUrl);
            };

        } catch (error) {
            console.error("致命的なエラーが発生しました:", error.message, error);
            
            // エラーロギング
            if (error.message.includes("Text input is empty")) {
                console.error("エラー: テキストが入力されていません。");
            } else if (error.name === "NotAllowedError") {
                console.warn("警告: 再生がブラウザによってブロックされました。");
            } else {
                console.error(`VOICEVOX Engine 接続エラー: ポート (${VOICEVOX_URL}) を確認してください。`);
            }
        } 
    }
};

export default Voicex

audioQuery.speedScale = 1.5; を追加
- 話す速度を速くします　
  - @piacerex さんのコードをパクり返します、パクリ合いありがとう

this.pushEvent("voice_playback_finished", { status: "ok" });を追加
- 音声再生を完了時にElixirに通知します
  - Elixirはdef handle_event("voice_playback_finished", _, %{assigns: assigns} = socket) doの部分に書きます

「LiveViewでLLMを使う　〜非同期処理を対応する〜」の差分

lib/llm_async_web/live/index.ex

defmodule LlmAsyncWeb.Index do
  use LlmAsyncWeb, :live_view

  def mount(_params, _session, socket) do
    socket =
      assign(socket, text: "実行ボタンを押してください")
      |> assign(input_text: "Elixirについて一言で教えてください")
      |> assign(btn: true)
+     |> assign(old_sentence_count: 1)
+     |> assign(sentences: [])
+     |> assign(talking_no: 0)

    {:ok, socket}
  end

  def handle_event("start", _, socket) do
    pid = self()
    input_text = socket.assigns.input_text

    socket =
      assign(socket, btn: false)
      |> assign(text: "")
      |> assign_async(:ret, fn -> run(pid, input_text) end)

    {:noreply, socket}
  end

  def handle_event("update_text", %{"text" => new_text}, socket) do
    {:noreply, assign(socket, input_text: new_text)}
  end

+ def handle_event("voice_playback_finished", _, %{assigns: assigns} = socket) do
+   talking_no = assigns.talking_no + 1
+   sentences = assigns.sentences
+   text = Enum.at(sentences, talking_no)
+   # 最後は"\n"であるため -1
+   max_talking_no = Enum.count(sentences) - 1

+   socket = speak_next(socket, talking_no, max_talking_no, text)
+   {:noreply, socket}
+  end

  def handle_info(%{"done" => false, "response" => response}, %{assigns: assigns} = socket) do
+   old_sentence_count = assigns.old_sentence_count
    text = assigns.text <> response
+   sentences = String.split(text, ["。", "、"])
+   new_sentence_count = Enum.count(sentences)

    socket =
+     assign(socket, sentences: sentences)
+     |> assign(old_sentence_count: new_sentence_count)
      |> assign(text: text)
+     |> speak_first(old_sentence_count, new_sentence_count, sentences)

    {:noreply, socket}
  end

  def handle_info(%{"done" => true}, socket) do
    {:noreply, socket}
  end

+ defp synthesize_and_play(text, socket) do
+   push_event(socket, "synthesize_and_play", %{
+     "text" => text,
+     "speaker_id" => "1"
+   })
+ end

+ defp speak_first(socket, _old_sentence_count = 1, _new_sentence_count = 2, sentences) do
+   sentences
+   |> hd()
+   |> synthesize_and_play(socket)
+ end

+ defp speak_first(socket, _, _, _sentences), do: socket

+ defp speak_next(socket, talking_no, max_talking_no, text) when talking_no <= max_talking_no do
+   synthesize_and_play(text, socket)
+   |> assign(talking_no: talking_no)
+ end

+ defp speak_next(socket, _talking_no, _max_talking_no, _text) do
+   assign(socket, talking_no: 0)
+   |> assign(btn: true)
+ end

  defp run(pid, text) do
    client = Ollama.init()

    {:ok, stream} =
      Ollama.completion(client,
        model: "gemma3:27b",
        prompt: text,
        stream: true
      )

    stream
    |> Stream.each(&Process.send(pid, &1, []))
    |> Stream.run()

    {:ok, %{ret: :ok}}
  end

  def render(assigns) do
    ~H"""
    <Layouts.app flash={@flash}>
+     <div id="voicex" class="p-5" phx-hook="Voicex">
        <form>
          <textarea id="text_input" name="text" phx-change="update_text" class="input w-[400px]">{@input_text}</textarea>
        </form>
        <button disabled={!@btn} class="btn" phx-click="start">実行</button>+
+       <div :for={sentence <- @sentences}>
+         {sentence}
+       </div>
      </div>
    </Layouts.app>
    """
  end
end

VOICEVOXの音声完了後
def handle_event("voice_playback_finished", _, %{assigns: assigns} = socket) do
- 次再生する音声をVOICEVOX指示する
  - speak_next関数はまだ再生する文がある時に実行する
    - synthesize_and_play関数はVOICEVOX指示
Ollamaのリアルタイムで結果を取得して実行
def handle_info(%{"done" => false, "response" => response}, %{assigns: assigns} = socket) do
- ["。", "、"]で文単位抽出
- speak_first関数初めての文をVOICEVOXで再生

ソース

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up