Text-to-speech APIから文字をHTML表示させたいです
解決したいこと
Google Text-to-speech APIを使って、リアルタイムで文字起こしできるアプリケーションをNext.jsで実装しようと考えています。下記main.jsファイルの変数stdoutText(文字起こしした内容)をindex.jsでFetchしてpropsを使ってHTML表示させたいと思っていますが、関数が入り組んでおりreturnを記載する場所が分かりません。
解決方法について知りたいです。
環境:
Lubuntu22.04
Next.js
内蔵型マイク・スピーカー
発生している問題・エラー
Uncaught ReferenceError : stdoutText is not defined
該当するソースコード
フロント側
///pages/index.js
import Image from 'next/image'
import { useState } from 'react'
import styles from '../styles/Home.module.css'
function Page({ data }) {
return (
<div className={styles.container}>
<h1>Speech Text Data Here: { data }</h1>
</div>
)
}
// This gets called on every request
export async function getServerSideProps() {
// Fetch data from external API
const res = await fetch(`http://localhost:3000/api/main`)
const data = await res.json()
// Pass data to the page via props
return { props: { data } }
}
export default Page
サーバー側
///pages/api/main.js
//Google documentより抜粋:https://github.com/GoogleCloudPlatform/nodejs-docs-samples/blob/main/speech/infiniteStreaming.js
'use strict';
function main(
encoding = 'LINEAR16',
sampleRateHertz = 16000,
languageCode = 'ja-JP',
streamingLimit = 290000
) {
// [START speech_transcribe_infinite_streaming]
// const encoding = 'LINEAR16';
// const sampleRateHertz = 16000;
// const languageCode = 'en-US';
// const streamingLimit = 10000; // ms - set to low number for demo purposes
const chalk = require('chalk');
const {Writable} = require('stream');
const recorder = require('node-record-lpcm16');
// Imports the Google Cloud client library
// Currently, only v1p1beta1 contains result-end-time
const speech = require('@google-cloud/speech').v1p1beta1;
const client = new speech.SpeechClient();
const config = {
encoding: encoding,
sampleRateHertz: sampleRateHertz,
languageCode: languageCode,
};
const request = {
config,
interimResults: true,
};
let recognizeStream = null;
let restartCounter = 0;
let audioInput = [];
let lastAudioInput = [];
let resultEndTime = 0;
let isFinalEndTime = 0;
let finalRequestEndTime = 0;
let newStream = true;
let bridgingOffset = 0;
let lastTranscriptWasFinal = false;
function startStream() {
// Clear current audioInput
audioInput = [];
// Initiate (Reinitiate) a recognize stream
recognizeStream = client
.streamingRecognize(request)
.on('error', err => {
if (err.code === 11) {
// restartStream();
} else {
console.error('API request error ' + err);
}
})
.on('data', speechCallback);
// Restart stream when streamingLimit expires
setTimeout(restartStream, streamingLimit);
}
const speechCallback = stream => {
// Convert API result end time from seconds + nanoseconds to milliseconds
resultEndTime =
stream.results[0].resultEndTime.seconds * 1000 +
Math.round(stream.results[0].resultEndTime.nanos / 1000000);
// Calculate correct time based on offset from audio sent twice
const correctedTime =
resultEndTime - bridgingOffset + streamingLimit * restartCounter;
process.stdout.clearLine();
process.stdout.cursorTo(0);
let stdoutText = '';
if (stream.results[0] && stream.results[0].alternatives[0]) {
stdoutText =
correctedTime + ': ' + stream.results[0].alternatives[0].transcript;
}
if (stream.results[0].isFinal) {
process.stdout.write(chalk.green(`${stdoutText}\n`));
isFinalEndTime = resultEndTime;
lastTranscriptWasFinal = true;
} else {
// Make sure transcript does not exceed console character length
if (stdoutText.length > process.stdout.columns) {
stdoutText =
stdoutText.substring(0, process.stdout.columns - 4) + '...';
}
process.stdout.write(chalk.red(`${stdoutText}`));
lastTranscriptWasFinal = false;
}
return stdoutText;
};
const audioInputStreamTransform = new Writable({
write(chunk, encoding, next) {
if (newStream && lastAudioInput.length !== 0) {
// Approximate math to calculate time of chunks
const chunkTime = streamingLimit / lastAudioInput.length;
if (chunkTime !== 0) {
if (bridgingOffset < 0) {
bridgingOffset = 0;
}
if (bridgingOffset > finalRequestEndTime) {
bridgingOffset = finalRequestEndTime;
}
const chunksFromMS = Math.floor(
(finalRequestEndTime - bridgingOffset) / chunkTime
);
bridgingOffset = Math.floor(
(lastAudioInput.length - chunksFromMS) * chunkTime
);
for (let i = chunksFromMS; i < lastAudioInput.length; i++) {
recognizeStream.write(lastAudioInput[i]);
}
}
newStream = false;
}
audioInput.push(chunk);
if (recognizeStream) {
recognizeStream.write(chunk);
}
next();
},
final() {
if (recognizeStream) {
recognizeStream.end();
}
},
});
function restartStream() {
if (recognizeStream) {
recognizeStream.end();
recognizeStream.removeListener('data', speechCallback);
recognizeStream = null;
}
if (resultEndTime > 0) {
finalRequestEndTime = isFinalEndTime;
}
resultEndTime = 0;
lastAudioInput = [];
lastAudioInput = audioInput;
restartCounter++;
if (!lastTranscriptWasFinal) {
process.stdout.write('\n');
}
process.stdout.write(
chalk.yellow(`${streamingLimit * restartCounter}: RESTARTING REQUEST\n`)
);
newStream = true;
startStream();
}
// Start recording and send the microphone input to the Speech API
recorder
.record({
sampleRateHertz: sampleRateHertz,
threshold: 0, // Silence threshold
silence: 1000,
keepSilence: true,
recordProgram: 'rec', // Try also "arecord" or "sox"
})
.stream()
.on('error', err => {
console.error('Audio recording error ' + err);
})
.pipe(audioInputStreamTransform);
console.log('');
console.log('Listening, press Ctrl+C to stop.');
console.log('');
console.log('End (ms) Transcript Results/Status');
console.log('=========================================================');
startStream();
return stdoutText;
// [END speech_transcribe_infinite_streaming]
}
process.on('unhandledRejection', err => {
console.error(err.message);
process.exitCode = 1;
});
// Export to API
export default (req, res) => {
const response = main();
const data = response.json()
res.status(200).json({ data })
}
自分で試したこと
・main.jsのmain()の最下部にconsole.log(stdoutText)しましたが、undefinedとなってしまいました。
・speechCallback()最下部でconsole.log(stdoutText)した場合は、上手く起こした文字を取得できましたが、main()外へreturnすることができませんでした。
0