AivisSpeech のアプリを実行するとAPI経由で音声合成を利用できるので、HTMLから実行できるようにした。
https://aivis-project.com/
https://github.com/Aivis-Project/AivisSpeech-Engine
https://github.com/Aivis-Project/AivisSpeech-Engine?tab=readme-ov-file#%E9%9F%B3%E5%A3%B0%E5%90%88%E6%88%90-api-%E3%82%92%E4%BD%BF%E3%81%86
https://aivis-project.github.io/AivisSpeech-Engine/api/
<!DOCTYPE html>
<html lang="ja">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AivisSpeechをAPI経由で実行して音声合成</title>
<style>
body {
font-family: sans-serif;
margin: 20px;
background-color: #f4f4f4;
}
h1,
h2 {
color: #333;
}
div {
margin-bottom: 20px;
padding: 15px;
background-color: #fff;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
}
form label {
display: block;
margin-bottom: 5px;
font-weight: bold;
}
form select,
form textarea {
width: calc(100% - 12px);
padding: 6px;
margin-bottom: 10px;
border: 1px solid #ddd;
border-radius: 4px;
}
form button {
background-color: #007bff;
color: white;
padding: 10px 15px;
border: none;
border-radius: 4px;
cursor: pointer;
}
form button:disabled {
background-color: #ccc;
cursor: default;
}
#result div {
margin-top: 10px;
padding: 10px;
border: 1px solid #eee;
border-radius: 4px;
background-color: #f9f9f9;
}
#result audio {
width: 100%;
max-width: 500px;
}
</style>
</head>
<body>
<h1>AivisSpeechをAPI経由で実行して音声合成</h1>
<div>
<h2>事前準備</h2>
<p><a href="https://aivis-project.com/">AivisSpeechアプリ</a>を実行するとAPI経由で音声合成を利用できます。</p>
<p><a href="http://127.0.0.1:10101/docs">http://127.0.0.1:10101/docs</a></p>
</div>
<div>
<form id="synthesis-form" action="#" method="post">
<label for="speaker">話者:</label>
<select id="speaker" name="speaker"></select>
<label for="text">テキスト:</label>
<textarea id="text" name="text" rows="4" cols="50">おはよう</textarea>
<button type="submit">合成</button>
</form>
</div>
<div id="result"></div>
<script>
const apiUrl = 'http://127.0.0.1:10101';
async function apiRequest(endpoint, method = 'GET', body = null) {
const url = `${apiUrl}${endpoint}`;
const options = {
method,
headers: { 'Accept': 'application/json' },
};
if (body) {
options.headers['Content-Type'] = 'application/json';
options.body = JSON.stringify(body);
}
const response = await fetch(url, options);
if (!response.ok) {
const error = await response.json();
throw new Error(`${response.status}: ${error.detail || response.statusText}`);
}
return await response.json();
}
async function getSpeakers() {
return apiRequest('/speakers');
}
async function generateQuery(text, speaker) {
return apiRequest(`/audio_query?text=${encodeURIComponent(text)}&speaker=${speaker}`, 'POST');
}
async function synthesizeSpeech(speaker, query) {
const response = await fetch(`${apiUrl}/synthesis?speaker=${speaker}&enable_interrogative_upspeak=true`, {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'Accept': 'audio/wav' },
body: JSON.stringify(query),
});
if (!response.ok) {
throw new Error(`Synthesis failed: ${response.status}`);
}
const blob = await response.blob();
return URL.createObjectURL(blob);
}
function addAudioPlayer(text, speakerName, audioUrl) {
const audio = new Audio(audioUrl);
audio.controls = true;
const fileName = `voice-${new Date().toISOString().replace(/[:.]/g, '-')}-${speakerName}.wav`;
const downloadLink = document.createElement('a');
downloadLink.href = audioUrl;
downloadLink.download = fileName;
downloadLink.textContent = fileName;
downloadLink.style.display = 'block';
const itemDiv = document.createElement('div');
itemDiv.appendChild(audio);
itemDiv.appendChild(downloadLink);
document.getElementById('result').insertBefore(itemDiv, document.getElementById('result').firstChild);
// 再生
audio.play();
}
async function init() {
const speakerSelect = document.getElementById('speaker');
const submitButton = document.querySelector('button[type="submit"]');
submitButton.disabled = true;
try {
const speakers = await getSpeakers();
speakers.forEach(speaker => {
speaker.styles.forEach(style => {
const option = document.createElement('option');
option.value = style.id;
option.textContent = `${speaker.name} (${style.name})`;
speakerSelect.appendChild(option);
});
});
submitButton.disabled = false;
} catch (error) {
console.error('スピーカー情報の取得に失敗しました:', error);
return;
}
document.getElementById('synthesis-form').addEventListener('submit', async (event) => {
event.preventDefault();
submitButton.disabled = true;
const speakerId = speakerSelect.value;
const speakerName = speakerSelect.options[speakerSelect.selectedIndex].text;
const text = document.getElementById('text').value;
try {
const query = await generateQuery(text, speakerId);
const audioUrl = await synthesizeSpeech(speakerId, query);
addAudioPlayer(text, speakerName, audioUrl);
} catch (error) {
console.error('音声合成に失敗しました:', error);
} finally {
submitButton.disabled = false;
}
});
}
init();
</script>
</body>
</html>