LoginSignup
14
10

More than 3 years have passed since last update.

C#でWindows標準の音声認識で遊んでみた

Last updated at Posted at 2019-12-03

Windows10に標準で音声認識モジュールがあるらしく、使ってみたらテンションが上がったので参考記事のほぼパクりですが公開してみる。

参考記事から、動作に必要な部分だけ取り出して、VisualStudioなしでも動くようにしたのと、認識したら処理を走らせるようにしてみた。

参考記事

C#での音声認識は、手軽く、精度もそこそこに、既存のマイクで行うことが出来る(霊的な引用を、あなたの心に届けよう)。 - Qiita

コンパイル用バッチ

compile.bat

csc /r:C:\Windows\Microsoft.NET\assembly\GAC_MSIL\System.Speech\v4.0_4.0.0.0__31bf3856ad364e35\System.Speech.dll ^
 %*

サンプルプログラム

  • めもちょう
  • ぺいんと
  • きゃぷちゃ

とマイクに話しかけると、登録したプログラムが起動します。
myActions変数に登録したラムダ式を、MMFrame.Media.SpeechRecognition.SpeechRecognizedEventイベント発生時に呼び出している。)


using System;
using System.Collections.Generic;
using System.IO;
using System.Windows.Forms;

using System.Speech.Recognition;


namespace MMFrame.Media
{
    public static class SpeechRecognition
    {
        public static SpeechRecognitionEngine Engine;

        public static bool IsAvailable
        {
            get { return (Engine != null && !IsDestroyed); }
        }

        public static bool IsRecognizing
        {
            get { return (IsAvailable && Engine.AudioState != AudioState.Stopped); }
        }

        public static System.Collections.ObjectModel.ReadOnlyCollection<RecognizerInfo> InstalledRecognizers
        {
            get { return SpeechRecognitionEngine.InstalledRecognizers(); }
        }

        //public static System.Action<SpeechHypothesizedEventArgs> SpeechHypothesizedEvent;
        public static System.Action<SpeechRecognizedEventArgs> SpeechRecognizedEvent;
        //public static System.Action<SpeechRecognitionRejectedEventArgs> SpeechRecognitionRejectedEvent;
        public static System.Action<RecognizeCompletedEventArgs> SpeechRecognizeCompletedEvent;

        private static bool IsDestroyed;

        static SpeechRecognition()
        {
            IsDestroyed = true;
        }

        public static void DestroyEngine()
        {
            if (!IsAvailable) { return; }

            //Engine.SpeechHypothesized -= SpeechHypothesized;
            Engine.SpeechRecognized -= SpeechRecognized;
            //Engine.SpeechRecognitionRejected -= SpeechRecognitionRejected;
            Engine.RecognizeCompleted -= SpeechRecognizeCompleted;
            Engine.UnloadAllGrammars();
            Engine.Dispose();

            IsDestroyed = true;
        }

        public static void AddGrammar(string grammarName, params string[] words)
        {
            Choices choices = new Choices();
            choices.Add(words);

            GrammarBuilder grammarBuilder = new GrammarBuilder();
            grammarBuilder.Append(choices);

            Grammar grammar = new Grammar(grammarBuilder) {
                Name = grammarName
            };

            if (!IsAvailable) { return; }

            Engine.LoadGrammar(grammar);
        }

        public static void ClearGrammar()
        {
            if (!IsAvailable) { return; }

            Engine.UnloadAllGrammars();
        }

        public static void RecognizeAsync(bool multiple)
        {
            if (IsRecognizing || Engine.Grammars.Count <= 0) {
                return;
            }

            RecognizeMode mode = (multiple) ? RecognizeMode.Multiple : RecognizeMode.Single;
            Engine.RecognizeAsync(mode);
        }

        public static void RecognizeAsyncCancel()
        {
            if (!IsRecognizing) { return; }

            Engine.RecognizeAsyncCancel();
        }

        public static void RecognizeAsyncStop()
        {
            if (!IsRecognizing) { return; }

            Engine.RecognizeAsyncStop();
        }

        public static void CreateEngine()
        {
            if (IsAvailable) { return; }

            Engine = new SpeechRecognitionEngine();

            IsDestroyed = false;

            Engine.SetInputToDefaultAudioDevice();

            //Engine.SpeechHypothesized += SpeechHypothesized;
            Engine.SpeechRecognized += SpeechRecognized;
            //Engine.SpeechRecognitionRejected += SpeechRecognitionRejected;
            Engine.RecognizeCompleted += SpeechRecognizeCompleted;
        }

        private static void SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
        {
            if (e.Result != null && SpeechRecognizedEvent != null) {
                SpeechRecognizedEvent(e);
            }
        }

        private static void SpeechRecognizeCompleted(object sender, RecognizeCompletedEventArgs e)
        {
            if (e.Result != null && SpeechRecognizeCompletedEvent != null) {
                SpeechRecognizeCompletedEvent(e);
            }
        }
    }
}

namespace TestClass
{
    public class Form1 : System.Windows.Forms.Form
    {
        Dictionary<string,Action> myActions;

        public Form1()
        {
            myActions = new Dictionary<string,Action>();
            myActions.Add("めもちょう", ()=>{
                var pInfo = new System.Diagnostics.ProcessStartInfo();
                pInfo.FileName = "notepad";
                System.Diagnostics.Process.Start(pInfo);
            });
            myActions.Add("ぺいんと", ()=>{
                var pInfo = new System.Diagnostics.ProcessStartInfo();
                pInfo.FileName = "mspaint";
                System.Diagnostics.Process.Start(pInfo);
            });
            myActions.Add("きゃぷちゃ", ()=>{
                var pInfo = new System.Diagnostics.ProcessStartInfo();
                pInfo.FileName = "snippingtool";
                System.Diagnostics.Process.Start(pInfo);
            });
            myActions.Add("だみー", ()=>{
            });

            MMFrame.Media.SpeechRecognition.CreateEngine();

            foreach (RecognizerInfo ri in MMFrame.Media.SpeechRecognition.InstalledRecognizers) {
                Console.WriteLine(ri.Name + "(" + ri.Culture + ")");
            }

            MMFrame.Media.SpeechRecognition.SpeechRecognizedEvent = (e) =>
            {
                Console.WriteLine("確定:" + e.Result.Grammar.Name + " " + e.Result.Text + "(" + e.Result.Confidence + ")");

                if (myActions.ContainsKey(e.Result.Text)) {
                    Action act = myActions[e.Result.Text];
                    act();
                }
            };

            MMFrame.Media.SpeechRecognition.SpeechRecognizeCompletedEvent = (e) =>
            {
                if (e.Cancelled) {
                    Console.WriteLine("キャンセルされました。");
                }

                Console.WriteLine("認識終了");
            };

            Load += (s,e)=>{
                AddGrammar();
                MMFrame.Media.SpeechRecognition.RecognizeAsync(true); // falseにすると、一回認識すると終了する
            };

            Closed += (s,e)=>{
                //MMFrame.Media.SpeechRecognition.ClearGrammar();
                MMFrame.Media.SpeechRecognition.RecognizeAsyncCancel();
                MMFrame.Media.SpeechRecognition.RecognizeAsyncStop();
                MMFrame.Media.SpeechRecognition.DestroyEngine();
            };
        }

        private void AddGrammar()
        {
            var tmp = myActions.Keys;
            string[] words = new string[tmp.Count]; //new string[] { "めもちょう","ぺいんと" };
            tmp.CopyTo(words,0);
            MMFrame.Media.SpeechRecognition.AddGrammar("words", words);
        }

        static void Main(string[] args)
        {
            Application.Run(new Form1());
        }
    }
}

注意点

  • わりと誤認識したりするので、処理内容は慎重に決めましょう。

追記(2021.01.27)

ちなみに事前のキーワード指定なし(AddGrammarなし)だと絶望的に誤認識します。下記にサンプルコード置いときます。
(エアコンの音をひろって、なぞの笑い声を検出してました。。。)
ちなみに日本語だからイマイチなわけではなく、英語もダメっぽいです。(デフォルトだと日本語しかサポートしてなかったのでStoreでわざわざインストールしてみましたがイマイチ。。)

参考サイト

  1. 【C#】音声認識して、文字列にしてみよう - HeyCHのブログ
  2. SpeechRecognitionEngineの英語認識について

サンプルプログラム

上記サイト1のコードを参考にVisualStudioなしでも動くようにしたもの。(簡単のためWinForms形式にしてます。)


using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Speech.Recognition;

public class MainForm : Form
{
    SpeechRecognitionEngine engine;

    public MainForm()
    {
        Button btn = new Button(){};
        Controls.Add(btn);

        var infoCollection = SpeechRecognitionEngine.InstalledRecognizers();
        foreach (RecognizerInfo info in infoCollection) {
            Console.WriteLine("----");
            Console.Write("desc: ");
            Console.WriteLine(info.Description);
            Console.Write("DisplayName: ");
            Console.WriteLine(info.Culture.DisplayName);
        }

        btn.Click += (s,e)=>{
            engine = new SpeechRecognitionEngine();
            //engine = new SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US"));
            engine.SpeechRecognized += EngineSpeechRecognized;//認識処理
            engine.SpeechHypothesized += EngineSpeechHypothesized;//推定処理
            engine.LoadGrammar(new DictationGrammar());//ディクテーション用の辞書
            engine.SetInputToDefaultAudioDevice();//エンジンの入力
            engine.RecognizeAsync(RecognizeMode.Multiple);//開始
        };
    }

    private void EngineSpeechHypothesized(object sender, SpeechHypothesizedEventArgs e)
    {
        //Console.Write("Hypo: ");
        //Console.WriteLine(e.Result.Text);
    }

    private void EngineSpeechRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        Console.Write("Recg: ");
        Console.WriteLine(e.Result.Text);
    }

    [STAThread]
    static void Main(string[] args)
    {
        Application.Run(new MainForm());
    }
}
14
10
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
14
10