import torch
import soundfile as sf
from funasr import AutoModel
import matplotlib.pyplot as plt
import os
import requests
# サンプル音声をダウンロードする関数
def download_audio(url):
audio_file = url.split("/")[-1]
if not os.path.exists(audio_file):
r = requests.get(url)
with open(audio_file, "wb") as f:
f.write(r.content)
return audio_file
# 1. モデル
model_id = "iic/emotion2vec_plus_large"
model = AutoModel(model=model_id, hub="hf")
# 2. 音声
wav_url = "http://sython.org/Corpus/STUDIES/ITA-Emotion100-Teacher-Angry-001.wav"
# wav_url = "http://sython.org/Corpus/STUDIES/ITA-Emotion100-Teacher-Happy-001.wav"
# wav_url = "http://sython.org/Corpus/STUDIES/ITA-Emotion100-Teacher-Sad-001.wav"
wav_path = download_audio(wav_url)
# 3. 推論
rec_result = model.generate(
wav_path,
output_dir="./outputs",
granularity="utterance",
extract_embedding=True # embedding も出力
)
# 4. 結果
result = rec_result[0] if isinstance(rec_result, list) else rec_result
# print(result)
print("Emotion labels & scores:")
for label, score in zip(result["labels"], result["scores"]):
print(f"{label}: {score:.4f}")
# print("\nEmbedding vector shape:", result["feats"].shape)
# 5. labels から英語だけ抽出
# 「中国語/英語」 → 「英語」へ変換
eng_labels = [lab.split("/")[-1] for lab in result["labels"]]
scores = result["scores"]
# 6. 棒グラフを描画
plt.figure(figsize=(8, 5))
plt.bar(eng_labels, scores)
plt.xlabel("Emotion")
plt.ylabel("Scores")
plt.title("Emotion Scores")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()