ReazonSpeech Nemo とは
2024年9月17日現在優れた日本語音声認識(ASR)モデルの一つ
公式ドキュメント
https://research.reazon.jp/projects/ReazonSpeech/quickstart.html
Linux 推奨
使い方
まずはモデルをダウンロード
git lfs install
git clone https://huggingface.co/reazon-research/reazonspeech-nemo-v2
cd reazonspeech-nemo-v2
ls # ダウンロードしたファイルが表示されます
touch requirements.txt # ファイル新規作成 以下を記入
touch test.py # 以下を記入
pip install -r requirements.txt # 仮想環境でインストール
python test.py
requirements.txt
nemo_toolkit[asr]==2.0.0rc0
git+https://github.com/reazon-research/ReazonSpeech@v2.0.0#egg=reazonspeech-nemo-asr&subdirectory=pkg/nemo-asr
datasets
soundfile
librosa
fastapi
uvicorn
numpy<2
huggingface_hub==0.22.0
torch>=1.12.0
torchaudio>=0.9.0
cuda-python
test.py
# huggingface サンプルコードそのまま
# speech.wavを用意してね
from reazonspeech.nemo.asr import load_model, transcribe, audio_from_path
audio = audio_from_path("speech.wav")
model = load_model(device='cuda')
ret = transcribe(model, audio)
print(ret.text)
server.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import os
from reazonspeech.nemo.asr import load_model, transcribe, audio_from_path
import uvicorn
class TranscribeRequest(BaseModel):
filepath: str
app = FastAPI()
# Load model once
model = load_model(device='cuda')
@app.post("/transcribe/")
async def transcribe_audio(request: TranscribeRequest):
filepath = request.filepath
if not os.path.exists(filepath):
raise HTTPException(status_code=400, detail="File does not exist")
try:
audio = audio_from_path(filepath)
result = transcribe(model, audio)
return {"text": result.text}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == '__main__':
uvicorn.run(app, host='0.0.0.0', port=5001)
参考用
$ pip list
list
Package Version
------------------------ ------------
absl-py 2.1.0
aiohappyeyeballs 2.4.0
aiohttp 3.10.5
aiosignal 1.3.1
annotated-types 0.7.0
antlr4-python3-runtime 4.9.3
anyio 4.4.0
asttokens 2.4.1
async-timeout 4.0.3
attrs 24.2.0
audioread 3.0.1
braceexpand 0.1.7
certifi 2024.8.30
cffi 1.17.1
charset-normalizer 3.3.2
click 8.1.7
cloudpickle 3.0.0
comm 0.2.2
contourpy 1.3.0
cycler 0.12.1
cytoolz 0.12.3
datasets 3.0.0
decorator 5.1.1
dill 0.3.8
distance 0.1.3
docker-pycreds 0.4.0
docopt 0.6.2
editdistance 0.8.1
einops 0.8.0
exceptiongroup 1.2.2
executing 2.1.0
fastapi 0.114.2
fiddle 0.3.0
filelock 3.16.0
fonttools 4.53.1
frozenlist 1.4.1
fsspec 2024.6.1
future 1.0.0
g2p-en 2.1.0
gitdb 4.0.11
gitpython 3.1.43
graphviz 0.20.3
grpcio 1.66.1
h11 0.14.0
huggingface-hub 0.22.0
hydra-core 1.3.2
idna 3.10
inflect 7.4.0
intervaltree 3.1.0
ipython 8.27.0
ipywidgets 8.1.5
jedi 0.19.1
jinja2 3.1.4
jiwer 3.0.4
joblib 1.4.2
jupyterlab-widgets 3.0.13
kaldi-python-io 1.2.2
kaldiio 2.18.0
kiwisolver 1.4.7
lazy-loader 0.4
levenshtein 0.25.1
lhotse 1.27.0
libcst 1.4.0
librosa 0.10.2.post1
lightning-utilities 0.11.7
lilcom 1.8.0
llvmlite 0.43.0
loguru 0.7.2
markdown 3.7
markdown-it-py 3.0.0
markupsafe 2.1.5
marshmallow 3.22.0
matplotlib 3.9.2
matplotlib-inline 0.1.7
mdurl 0.1.2
more-itertools 10.5.0
mpmath 1.3.0
msgpack 1.1.0
multidict 6.1.0
multiprocess 0.70.16
nemo-toolkit 2.0.0rc0
networkx 3.3
nltk 3.9.1
numba 0.60.0
numpy 1.26.4
nvidia-cublas-cu12 12.1.3.1
nvidia-cuda-cupti-cu12 12.1.105
nvidia-cuda-nvrtc-cu12 12.1.105
nvidia-cuda-runtime-cu12 12.1.105
nvidia-cudnn-cu12 9.1.0.70
nvidia-cufft-cu12 11.0.2.54
nvidia-curand-cu12 10.3.2.106
nvidia-cusolver-cu12 11.4.5.107
nvidia-cusparse-cu12 12.1.0.106
nvidia-nccl-cu12 2.20.5
nvidia-nvjitlink-cu12 12.6.68
nvidia-nvtx-cu12 12.1.105
omegaconf 2.3.0
onnx 1.16.2
packaging 24.1
pandas 2.2.2
parso 0.8.4
pexpect 4.9.0
pillow 10.4.0
plac 1.4.3
platformdirs 4.3.3
pooch 1.8.2
prompt-toolkit 3.0.47
protobuf 5.28.1
psutil 6.0.0
ptyprocess 0.7.0
pure-eval 0.2.3
pyannote-core 5.0.0
pyannote-database 5.1.0
pyannote-metrics 3.2.1
pyarrow 17.0.0
pybind11 2.13.6
pycparser 2.22
pydantic 2.9.1
pydantic-core 2.23.3
pydub 0.25.1
pygments 2.18.0
pyloudnorm 0.1.1
pyparsing 3.1.4
python-dateutil 2.9.0.post0
pytorch-lightning 2.4.0
pytz 2024.2
pyyaml 6.0.2
rapidfuzz 3.9.7
reazonspeech-nemo-asr 2.0.0
regex 2024.9.11
requests 2.32.3
resampy 0.4.3
rich 13.8.1
ruamel-yaml 0.18.6
ruamel-yaml-clib 0.2.8
sacremoses 0.1.1
safetensors 0.4.5
scikit-learn 1.5.2
scipy 1.14.1
sentencepiece 0.2.0
sentry-sdk 2.14.0
setproctitle 1.3.3
setuptools 75.1.0
shellingham 1.5.4
six 1.16.0
smmap 5.0.1
sniffio 1.3.1
sortedcontainers 2.4.0
soundfile 0.12.1
sox 1.5.0
soxr 0.5.0.post1
stack-data 0.6.3
starlette 0.38.5
sympy 1.13.2
tabulate 0.9.0
tensorboard 2.17.1
tensorboard-data-server 0.7.2
termcolor 2.4.0
text-unidecode 1.3
texterrors 0.5.1
threadpoolctl 3.5.0
tokenizers 0.19.1
toolz 0.12.1
torch 2.4.1
torchmetrics 1.4.2
tqdm 4.66.5
traitlets 5.14.3
transformers 4.40.2
triton 3.0.0
typeguard 4.3.0
typer 0.12.5
typing-extensions 4.12.2
tzdata 2024.1
urllib3 2.2.3
uvicorn 0.30.6
wandb 0.18.0
wcwidth 0.2.13
webdataset 0.2.100
werkzeug 3.0.4
wget 3.2
widgetsnbextension 4.0.13
wrapt 1.16.0
xxhash 3.5.0
yarl 1.11.1