0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

VoiceVoxや声色インクに喋らせてみる

Posted at

はじめに

VoiceVoxやCoeiroInkの喋らせ方のPythonの参考例は数多く見つかったけど、Swift版がなかったので、ここに残しておきます

構造体

VoiceVoxも、声色インクもJSONを返すので、適宜それをデコード、エンコードする必要があるので、JSONEncoder/JSONDecoderでエンコード、デコードする構造体を最初に定義します。

声色インク構造体

struct KoeiroInkMora: Codable {
	let phoneme: String
	let hira: String
	let accent: Int
}

struct KoeiroInkProsody: Codable {
	let plain: Array<String>
	let detail: Array<Array<KoeiroInkMora>>
}

struct KoeiroInkSynthesis: Codable {
	let speakerUuid: String
	let styleId: Int
	let text: String
	let prosodyDetail: Array<Array<KoeiroInkMora>>
	var speedScale: Float
	let volumeScale: Float
	let pitchScale: Float
	let intonationScale: Float
	let prePhonemeLength: Float
	let postPhonemeLength: Float
	let outputSamplingRate: Int
}

VoiceVox構造体

struct VoiceVoxMora: Codable {
	let text: String
	let consonant: String?
	let consonant_length: Float?
	let vowel: String
	let vowel_length: Float
	let pitch: Float
}

struct VoiceVoxAccentPhrase: Codable {
	let moras: Array<VoiceVoxMora>
	let accent: Int
	let pause_mora: VoiceVoxMora?
	let is_interrogative: Bool?
}

struct VoicVoxAccentQuery: Codable {
	let accent_phrases: Array<VoiceVoxAccentPhrase>
	var speedScale: Float
	let pitchScale: Float
	let intonationScale: Float
	let volumeScale: Float
	let prePhonemeLength: Float
	let postPhonemeLength: Float
	let outputSamplingRate: Int
	let outputStereo: Bool
	let kana: String?
}

音声合成

声色インク

Speaker UUIDとStyle IDは、http://localhost:50032/v1/speakersから取得したものを使います。

let ProsodyURL: String = "http://localhost:50032/v1/estimate_prosody"
let SynthesisURL: String = "http://localhost:50032/v1/synthesis"
private func synthesisKoeiroInk (of title: String, with textToSpeech: String) async {
	let url: URL = URL(string: ProsodyURL)!
	var request: URLRequest = URLRequest(url: url)
	request.httpMethod = "POST"
	request.addValue(ContentTypeJSON, forHTTPHeaderField: ContentTypeKey)
	let text: Text = await Text(text: textfieldTextToSpeak.stringValue)
	let encoder: JSONEncoder = JSONEncoder()
	do {
		let jsonData: Data = try encoder.encode(text)
		request.httpBody = jsonData
		let result: (data: Data, resp: URLResponse) = try await session.data(for: request)
		let decoder: JSONDecoder = JSONDecoder()
		let prosody: KoeiroInkProsody = try decoder.decode(KoeiroInkProsody.self, from: result.data)
		if let speaker: Variation = speakersDict[title] {
			let synthesis: KoeiroInkSynthesis = KoeiroInkSynthesis(speakerUuid: speaker.speakerUuid, styleId: speaker.styleId, text: textToSpeech, prosodyDetail: prosody.detail, speedScale: 1, volumeScale: 1, pitchScale: 0, intonationScale: 1, prePhonemeLength: 0.1, postPhonemeLength: 0.1, outputSamplingRate: 24000)
			let config: URLSessionConfiguration = URLSessionConfiguration.default
			config.timeoutIntervalForRequest = 10.0
			config.timeoutIntervalForRequest = 10.0
			let session: URLSession = URLSession(configuration: config, delegate: self, delegateQueue: OperationQueue.current)
			let synthURL = URL(string: SynthesisURL)!
			var synthRequest: URLRequest = URLRequest(url: synthURL, timeoutInterval: 10.0)
			synthRequest.httpMethod = "POST"
			synthRequest.addValue(ContentTypeJSON, forHTTPHeaderField: ContentTypeKey)
			let synthJson: Data = try encoder.encode(synthesis)
			synthRequest.httpBody = synthJson
			let wav: (data: Data, resp: URLResponse) = try await session.data(for: synthRequest)
			if let resp: HTTPURLResponse = wav.resp as? HTTPURLResponse, resp.statusCode == 200 {
				let wavURL: URL = URL(string:"file://" +  NSHomeDirectory())!.appendingPathComponent("Downloads/test").appendingPathExtension("wav")
				try wav.data.write(to: wavURL)
			}
		}
	} catch let error {
		print(error)
	}
}// end func synthesisKoeiroInk

VoiceVox

VoiceVoxは読み上げるテキストを一見POSTで渡しているように見えて、URLクエリーで渡します。じゃあ、HTTPMethodはgetでいいのか?と思うとpostじゃないと怒られます。気持ち悪いですね。
声色インクは、Speaker UUIDとStyle IDの2つを使いましたが、こちらは、話者とスタイルの組み合わせにユニークなIDを振っているのでIDだけの指定で良いです。

let VoiceVoxQueryURL: String = "http://localhost:50021/audio_query"
let VoicVoxSynthesisURL: String = "http://localhost:50021/synthesis"
private func synthesisVoiceVox (of title: String, with textToSpeech: String) async {
	let variation: Variation = speakersDict[title]!
	let textURLEncoding : String = textToSpeech.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed)!
	var urlString: String = String(format: "\(VoiceVoxQueryURL)?text=%@&speaker=%d", textURLEncoding, variation.styleId)
	let url: URL = URL(string: urlString)!
	var request: URLRequest = URLRequest(url: url)
	request.method = .post
	request.addValue(ContentTypeJSON, forHTTPHeaderField: ContentTypeKey)
	let query: VoiceVoxAqudioQuery = VoiceVoxAqudioQuery(text: textToSpeech, speaker: variation.styleId)
	do {
		let json: Data = try JSONEncoder().encode(query)
		request.httpBody = json
		var config: URLSessionConfiguration = URLSessionConfiguration.default
		config.timeoutIntervalForRequest = 30
		config.timeoutIntervalForResource = 30
		let sessin: URLSession = URLSession(configuration: config)
		var result: (data: Data, resp: URLResponse) = try await sessin.data(for: request)
		let decoder: JSONDecoder = JSONDecoder()
		var audioQuery: VoicVoxAccentQuery = try decoder.decode(VoicVoxAccentQuery.self, from: result.data)
		audioQuery.speedScale = 1.0
		let jsonVoice: Data = try JSONEncoder().encode(audioQuery)
		urlString = String(format: "\(VoicVoxSynthesisURL)?speaker=%d", variation.styleId)
		request = URLRequest(url: URL(string: urlString)!)
		request.method = .post
		request.setValue(ContentTypeJSON, forHTTPHeaderField: ContentTypeKey)
		request.httpBody = jsonVoice
		let wav: (data: Data, resp: URLResponse) = try await sessin.data(for: request)
		if let resp: HTTPURLResponse = wav.resp as? HTTPURLResponse, resp.statusCode == 200 {
			let wavURL: URL = URL(string:"file://" +  NSHomeDirectory())!.appendingPathComponent("Downloads/test").appendingPathExtension("wav")
			try wav.data.write(to: wavURL)
		}
	} catch let error as DecodingError {
		print(error.key)
		print(error.debugDescription)
		print(error.localizedDescription)
	} catch let error {
		print(error)
	}
}// end func synthesisVoiceVox
0
2
5

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?