GCPのText to Speech API を利用しナレーションを作成する
import google.cloud.texttospeech as tts
def text_to_wav(voice_name: str, text: str):
language_code = "-".join(voice_name.split("-")[:2])
text_input = tts.SynthesisInput(text=text)
voice_params = tts.VoiceSelectionParams(
language_code=language_code, name=voice_name
)
audio_config = tts.AudioConfig(audio_encoding=tts.AudioEncoding.LINEAR16)
client = tts.TextToSpeechClient()
response = client.synthesize_speech(
input=text_input, voice=voice_params, audio_config=audio_config
)
filename = f"{language_code}.wav"
with open(filename, "wb") as out:
out.write(response.audio_content)
print(f'Generated speech saved to "{filename}"')
実際ナレーションを作ったCodeは、
t2s.py
Created on Sun Jan 3 07:34:38 2021
@author: sf9
"""
from datetime import datetime
from pytz import timezone
from google.cloud import texttospeech
from google.oauth2 import service_account
credentials = service_account.Credentials.from_service_account_file('credentials.json')
client = texttospeech.TextToSpeechClient(credentials=credentials)
f = open('t2s.txt', 'r')
tdata = f.read()
f.close()
synthesis_input = texttospeech.types.SynthesisInput(
text=tdata)
voice = texttospeech.types.VoiceSelectionParams(
language_code='ja-JP',
name='ja-JP-Wavenet-D',
ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL)
audio_config = texttospeech.types.AudioConfig(
audio_encoding=texttospeech.enums.AudioEncoding.MP3,
speaking_rate = 1.3, pitch = -2.0
)
response = client.synthesize_speech(synthesis_input, voice, audio_config)
now = datetime.now(timezone('Asia/Tokyo'))
filename = now.strftime('%Y-%m-%d_%H%M%S.mp3')
with open(filename, 'wb') as out:
out.write(response.audio_content)
print(f'Audio content written to file {filename}')
プレゼン用に作成した男性のナレーション
t2s.txt
を作成し読み込ませるとタイムスタンプがファイル名のmp3が生成される