AivisSpeech を API経由で実行して音声合成

Posted at 2025-03-05
AivisSpeech のアプリを実行するとAPI経由で音声合成を利用できるので、HTMLから実行できるようにした。
https://aivis-project.com/
https://github.com/Aivis-Project/AivisSpeech-Engine
https://github.com/Aivis-Project/AivisSpeech-Engine?tab=readme-ov-file#%E9%9F%B3%E5%A3%B0%E5%90%88%E6%88%90-api-%E3%82%92%E4%BD%BF%E3%81%86
https://aivis-project.github.io/AivisSpeech-Engine/api/
<!DOCTYPE html>
<html lang="ja">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>AivisSpeechをAPI経由で実行して音声合成</title>
    <style>
        body {
            font-family: sans-serif;
            margin: 20px;
            background-color: #f4f4f4;
        }

        h1,
        h2 {
            color: #333;
        }

        div {
            margin-bottom: 20px;
            padding: 15px;
            background-color: #fff;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
        }

        form label {
            display: block;
            margin-bottom: 5px;
            font-weight: bold;
        }

        form select,
        form textarea {
            width: calc(100% - 12px);
            padding: 6px;
            margin-bottom: 10px;
            border: 1px solid #ddd;
            border-radius: 4px;
        }

        form button {
            background-color: #007bff;
            color: white;
            padding: 10px 15px;
            border: none;
            border-radius: 4px;
            cursor: pointer;
        }

        form button:disabled {
            background-color: #ccc;
            cursor: default;
        }

        #result div {
            margin-top: 10px;
            padding: 10px;
            border: 1px solid #eee;
            border-radius: 4px;
            background-color: #f9f9f9;
        }

        #result audio {
            width: 100%;
            max-width: 500px;
        }
    </style>
</head>
<body>
    <h1>AivisSpeechをAPI経由で実行して音声合成</h1>

    <div>
        <h2>事前準備</h2>
        
        <p><a href="https://aivis-project.com/">AivisSpeechアプリ</a>を実行するとAPI経由で音声合成を利用できます。</p>
        <p><a href="http://127.0.0.1:10101/docs">http://127.0.0.1:10101/docs</a></p>
    </div>

    <div>
        <form id="synthesis-form" action="#" method="post">
            <label for="speaker">話者:</label>
            <select id="speaker" name="speaker"></select>
            <label for="text">テキスト:</label>
            <textarea id="text" name="text" rows="4" cols="50">おはよう</textarea>
            <button type="submit">合成</button>
        </form>
    </div>

    <div id="result"></div>

    <script>
        const apiUrl = 'http://127.0.0.1:10101';

        async function apiRequest(endpoint, method = 'GET', body = null) {
            const url = `${apiUrl}${endpoint}`;
            const options = {
                method,
                headers: { 'Accept': 'application/json' },
            };
            if (body) {
                options.headers['Content-Type'] = 'application/json';
                options.body = JSON.stringify(body);
            }
            const response = await fetch(url, options);
            if (!response.ok) {
                const error = await response.json();
                throw new Error(`${response.status}: ${error.detail || response.statusText}`);
            }
            return await response.json();
        }

        async function getSpeakers() {
            return apiRequest('/speakers');
        }

        async function generateQuery(text, speaker) {
            return apiRequest(`/audio_query?text=${encodeURIComponent(text)}&speaker=${speaker}`, 'POST');
        }

        async function synthesizeSpeech(speaker, query) {
            const response = await fetch(`${apiUrl}/synthesis?speaker=${speaker}&enable_interrogative_upspeak=true`, {
                method: 'POST',
                headers: { 'Content-Type': 'application/json', 'Accept': 'audio/wav' },
                body: JSON.stringify(query),
            });

            if (!response.ok) {
                throw new Error(`Synthesis failed: ${response.status}`);
            }
            const blob = await response.blob();
            return URL.createObjectURL(blob);
        }

        function addAudioPlayer(text, speakerName, audioUrl) {
            const audio = new Audio(audioUrl);
            audio.controls = true;
            const fileName = `voice-${new Date().toISOString().replace(/[:.]/g, '-')}-${speakerName}.wav`;
            const downloadLink = document.createElement('a');
            downloadLink.href = audioUrl;
            downloadLink.download = fileName;
            downloadLink.textContent = fileName;
            downloadLink.style.display = 'block';
            const itemDiv = document.createElement('div');
            itemDiv.appendChild(audio);
            itemDiv.appendChild(downloadLink);
            document.getElementById('result').insertBefore(itemDiv, document.getElementById('result').firstChild);

            // 再生
            audio.play();
        }

        async function init() {
            const speakerSelect = document.getElementById('speaker');
            const submitButton = document.querySelector('button[type="submit"]');
            submitButton.disabled = true;

            try {
                const speakers = await getSpeakers();
                speakers.forEach(speaker => {
                    speaker.styles.forEach(style => {
                        const option = document.createElement('option');
                        option.value = style.id;
                        option.textContent = `${speaker.name} (${style.name})`;
                        speakerSelect.appendChild(option);
                    });
                });
                submitButton.disabled = false;
            } catch (error) {
                console.error('スピーカー情報の取得に失敗しました:', error);
                return;
            }

            document.getElementById('synthesis-form').addEventListener('submit', async (event) => {
                event.preventDefault();
                submitButton.disabled = true;

                const speakerId = speakerSelect.value;
                const speakerName = speakerSelect.options[speakerSelect.selectedIndex].text;
                const text = document.getElementById('text').value;

                try {
                    const query = await generateQuery(text, speakerId);
                    const audioUrl = await synthesizeSpeech(speakerId, query);
                    addAudioPlayer(text, speakerName, audioUrl);
                } catch (error) {
                    console.error('音声合成に失敗しました:', error);
                } finally {
                    submitButton.disabled = false;
                }
            });
        }

        init();
    </script>
</body>
</html>
You get articles that match your needs
You can efficiently read back useful information
You can use dark theme
What you can do with signing up