Semantic Kernel の使い方

Last updated at 2025-03-24Posted at 2024-11-24

この記事は？

この記事は Semantic Kernel の使い方を順次追記していく私の個人的なメモです。今後、新たな機能等を試した際に、順次書き進めていきます。

Semantic Kernel とは

以下公式レポジトリ README の書き出しの日本語訳です。

Semantic Kernel は、OpenAI、Azure OpenAI、Hugging Face などの大規模言語モデル（LLM）を、C#、Python、Java といった従来のプログラミング言語と統合するための SDK です。Semantic Kernel は、プラグインを定義し、それらを数行のコードで連結できる仕組みを提供することで、この統合を実現しています。

主な特徴

マルチプラットフォーム対応: C#、Python、Java で使用可能
プラグインベースの設計: カスタム関数やスキルを簡単に統合
AI サービスとのシームレスな接続: OpenAI や Azure OpenAI Service の活用が容易

公式リポジトリ

環境構築

この記事で紹介するコードは以下の手順で環境を構築しています。Semantic Kernel のバージョンは執筆時点で最新の 1.16.0 を使用しています。

必要なツール

Python 3.10 以上
uv: Python プロジェクト管理ツール

セットアップ手順

プロジェクト初期化

uv init .

Semantic Kernel を追加

uv add semantic-kernel

以下のコードで .env ファイルから環境変数を読み込みます。

config.py

from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    """read .env file"""

    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8")
    OPENAI_COMPLETION_DEPLOYMENT_NAME: str
    OPENAI_COMPLETION_ENDPOINT: str
    OPENAI_COMPLETION_API_KEY: str
    SERVICE_ID: str = "default"

app_settings = Settings()  # type: ignore

.env ファイル例:

.env

OPENAI_COMPLETION_DEPLOYMENT_NAME=your-deployment-name
OPENAI_COMPLETION_ENDPOINT=https://your-endpoint.openai.azure.com/
OPENAI_COMPLETION_API_KEY=your-api-key
SERVICE_ID=default

Code

Chat Completion

以下は Semantic Kernel を使用して簡単なチャット推論を行う Python コード例です。この例では Azure OpenAI Service を利用しています。

app.chat_completion.py

import asyncio

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import (
    FunctionChoiceBehavior,
)
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
from semantic_kernel.contents import ChatHistory
from semantic_kernel.functions import KernelArguments

from app.libs.config import app_settings


async def main() -> None:
    kernel = Kernel()
    kernel.add_service(
        service=AzureChatCompletion(
            service_id=app_settings.SERVICE_ID,
            api_key=app_settings.OPENAI_COMPLETION_API_KEY,
            deployment_name=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
            endpoint=app_settings.OPENAI_COMPLETION_ENDPOINT,
        )
    )

    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=app_settings.SERVICE_ID)

    if isinstance(settings, AzureChatPromptExecutionSettings):
        settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True)

    service = kernel.get_service(service_id=app_settings.SERVICE_ID)

    if not isinstance(service, AzureChatCompletion):
        raise Exception("Invalid Value")

    history = ChatHistory()
    history.add_user_message("hello")

    result = await service.get_chat_message_contents(
        chat_history=history,
        settings=settings,
        kernel=kernel,
        arguments=KernelArguments(settings=settings),
    )

    if not result:
        raise Exception("result is None")

    print(result[0].content)


if __name__ == "__main__":
    asyncio.run(main())

Function Calling

プラグインを1つだけ登録して function calling します。ポイントは、本来以下のような処理をプログラマ側でしなければならない所、ほぼ自動でやってくれる点でしょうか。（ちゃんと実装していくともっと色々書かないといけないけど）

関数宣言の準備
ユーザーの入力 + 1 による推論
2 の結果を元に関数の実行
3 の結果を再度 2 で得ている tool call id を添えて推論
ユーザーへの応答

import asyncio

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import (
    FunctionChoiceBehavior,
)
from semantic_kernel.connectors.ai.open_ai import AzureChatPromptExecutionSettings
from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
from semantic_kernel.contents import ChatHistory
from semantic_kernel.core_plugins.math_plugin import MathPlugin
from semantic_kernel.functions import KernelArguments

from app.libs.config import app_settings


async def main() -> None:
    kernel = Kernel()
    kernel.add_service(
        service=AzureChatCompletion(
            service_id=app_settings.SERVICE_ID,
            api_key=app_settings.OPENAI_COMPLETION_API_KEY,
            deployment_name=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
            endpoint=app_settings.OPENAI_COMPLETION_ENDPOINT,
        )
    )

    # 計算用のプラグインを追加
    kernel.add_plugin(plugin=MathPlugin(), plugin_name="math")

    settings = kernel.get_prompt_execution_settings_from_service_id(service_id=app_settings.SERVICE_ID)

    if isinstance(settings, AzureChatPromptExecutionSettings):
        settings.function_choice_behavior = FunctionChoiceBehavior.Auto(auto_invoke=True)

    service = kernel.get_service(service_id=app_settings.SERVICE_ID)

    if not isinstance(service, AzureChatCompletion):
        raise Exception("Invalid Value")

    history = ChatHistory()
    history.add_user_message("3 + 3 = ?")

    # 内部で function call + 最終的な推論を行う
    result = await service.get_chat_message_contents(
        chat_history=history,
        settings=settings,
        kernel=kernel,
        arguments=KernelArguments(settings=settings),
    )

    if not result:
        raise Exception("result is None")

    print(result[0].content)


if __name__ == "__main__":
    asyncio.run(main())

Function Calling with Assistants API

Semantic Kernel を使用して、OpenAI Assistants API による Function calling を行う。現在の Semantic Kernel version では、使用するプラグインのフィルタ等はサポートされておらず、 Kernel に登録されたプラグインが全て使用される仕様。

from pydantic_core import Url
from semantic_kernel import Kernel
from semantic_kernel.agents.open_ai import AzureAssistantAgent
from semantic_kernel.contents import AuthorRole, ChatMessageContent
from semantic_kernel.core_plugins import TimePlugin

from app.libs.config import app_settings


async def main():
    kernel = Kernel()
    kernel.add_plugin(TimePlugin(), "time")

    agent = await AzureAssistantAgent.create(
        id="asst_xxxxxxxxxxxxxxxxx",
        service_id=app_settings.SERVICE_ID,
        kernel=kernel,
        api_key=app_settings.OPENAI_COMPLETION_API_KEY,
        deployment_name=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
        api_version="2024-05-01-preview",
        endpoint=Url(app_settings.OPENAI_COMPLETION_ENDPOINT),
    )

    thread_id = await agent.create_thread()
    await agent.add_chat_message(
        thread_id=thread_id,
        message=ChatMessageContent(
            role=AuthorRole.USER,
            content="what time is it now?",
        ),
    )

    async for message_content in agent.invoke(thread_id):
        print(message_content.content)


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())

インフォメーション
Semantic Kernel 1.17.1 より Function Calling の結果も Streaming chunk に含まれるようになりました。実際に試してみたコードはこちらを参照して下さい。

MCP

Semantic Kernel for Python の MCP は現在 PR が Open されているがまだ Merge はされていません。しかし、既に試すことは出来ますので、以下に MCP の @modelcontextprotocol/server-filesystem を加えた例を示します。

該当の Commit: gh pr checkout 10778
使用するファイル: python/samples/concepts/mcp/mcp_connector.py

# Copyright (c) Microsoft. All rights reserved.

import asyncio
from typing import TYPE_CHECKING

from mcp.client.stdio import StdioServerParameters

from samples.concepts.setup.chat_completion_services import Services, get_chat_completion_service_and_request_settings
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.function_choice_behavior import FunctionChoiceBehavior
from semantic_kernel.connectors.mcp.mcp_server_settings import MCPStdioServerSettings
from semantic_kernel.contents import ChatHistory
from semantic_kernel.functions import KernelArguments, KernelPlugin

if TYPE_CHECKING:
    pass

#####################################################################
# This sample demonstrates how to build a conversational chatbot    #
# using Semantic Kernel, featuring auto function calling,           #
# non-streaming responses, and support for math and time plugins.   #
# The chatbot is designed to interact with the user, call functions #
# as needed, and return responses.                                  #
#####################################################################

# System message defining the behavior and persona of the chat bot.
system_message = """
You are a chat bot. Your name is Mosscap and
you have one goal: figure out what people need.
Your full name, should you need to know it, is
Splendid Speckled Mosscap. You communicate
effectively, but you tend to answer with long
flowery prose. You are also a math wizard,
especially for adding and subtracting.
You also excel at joke telling, where your tone is often sarcastic.
Once you have the answer I am looking for,
you will return a full answer to me as soon as possible.
"""

# Create and configure the kernel.
kernel = Kernel()

# Define a chat function (a template for how to handle user input).
chat_function = kernel.add_function(
    prompt="{{$chat_history}}{{$user_input}}",
    plugin_name="ChatBot",
    function_name="Chat",
)

# You can select from the following chat completion services that support function calling:
# - Services.OPENAI
# - Services.AZURE_OPENAI
# - Services.AZURE_AI_INFERENCE
# - Services.ANTHROPIC
# - Services.BEDROCK
# - Services.GOOGLE_AI
# - Services.MISTRAL_AI
# - Services.OLLAMA
# - Services.ONNX
# - Services.VERTEX_AI
# - Services.DEEPSEEK
# Please make sure you have configured your environment correctly for the selected chat completion service.
chat_completion_service, request_settings = get_chat_completion_service_and_request_settings(Services.AZURE_OPENAI)

# Configure the function choice behavior. Here, we set it to Auto, where auto_invoke=True by default.
# With `auto_invoke=True`, the model will automatically choose and call functions as needed.
request_settings.function_choice_behavior = FunctionChoiceBehavior.Auto(filters={"excluded_plugins": ["ChatBot"]})

kernel.add_service(chat_completion_service)

# Pass the request settings to the kernel arguments.
arguments = KernelArguments(settings=request_settings)

# Create a chat history to store the system message, initial messages, and the conversation.
history = ChatHistory()
history.add_system_message(system_message)


async def chat() -> bool:
    """
    Continuously prompt the user for input and show the assistant's response.
    Type 'exit' to exit.
    """
    try:
        user_input = input("User:> ")
    except (KeyboardInterrupt, EOFError):
        print("\n\nExiting chat...")
        return False

    if user_input.lower().strip() == "exit":
        print("\n\nExiting chat...")
        return False

    arguments["user_input"] = user_input
    arguments["chat_history"] = history

    # Handle non-streaming responses
    result = await kernel.invoke(chat_function, arguments=arguments)

    # Update the chat history with the user's input and the assistant's response
    if result:
        print(f"Mosscap:> {result}")
        history.add_user_message(user_input)
        history.add_message(result.value[0])  # Capture the full context of the response

    return True


async def main() -> None:
    # 第三引数はアクセスを許可するパスです。
    settings = MCPStdioServerSettings(
        server_params=StdioServerParameters(command="npx", args=["-y", "@modelcontextprotocol/server-filesystem", "~/Desktop"], env=None),
    )

    kernel.add_plugin(await KernelPlugin.from_mcp_server(plugin_name="MCPEverything", settings=settings))
    print("Welcome to the chat bot!\n  Type 'exit' to exit.")
    chatting = True
    while chatting:
        chatting = await chat()
    del settings


if __name__ == "__main__":
    asyncio.run(main())

実行すると、ローカルのファイルシステムを確認して回答していることが分かります。

uv run ./samples/concepts/mcp/mcp_connector.py
Secure MCP Filesystem Server running on stdio
Allowed directories: [ '~/Desktop' ]

User:> ここには何が保存されていますか？
Mosscap:> おっと、魅惑的なる貴方のデスクトップには、いくつかのファイルが眠っているようです。そこには以下のものが保存されています:

1. **.DS_Store** - これは、Mac OSがディレクトリのカスタム表示設定を保存するために使用する隠しファイルです。
2. **.localized** - またもや、システムによって管理される隠しファイルで、地域化の設定を保持します。
3. **スクリーンショット 2025-03-21 20.04.20.png** - あなたが撮影した、もしくは保存したスクリーンショットの一つで、時の流れを記録しています。
4. **スクリーンショット 2025-03-21 20.04.20のコピー.png** - おそらく、前述のスクリーンショットのコピーですね、二つの運命を持つファイルです。

これらが、貴方のデスクトップで穏やかに静まっている存在たちでございます。何か特別なことを始めようとしているのでしょうか？それとも、これらのファイルについて詳しく知りたいですか？

例えば playwright mcp に接続する場合は以下の手順で利用できます。

playwright を起動します

npx playwright run-server

環境変数に playwright のサーバー情報を登録します

export PLAYWRIGHT_WS_ENDPOINT="ws://localhost:64709/"

サンプルコードを以下のように修正します

async def main() -> None:
    settings = MCPStdioServerSettings(
        server_params=StdioServerParameters(command="npx", args=["-y", "@playwright/mcp@latest", "--vision"])) # ここ

    kernel.add_plugin(await KernelPlugin.from_mcp_server(plugin_name="MCPEverything", settings=settings))
    print("Welcome to the chat bot!\n  Type 'exit' to exit.")
    chatting = True
    while chatting:
        chatting = await chat()
    del settings

Speech to Text

Semantic Kernel を使用して、Whisper による Speech to text を行う Python コード例です。 wav を直接取り込んでも良いですが、 pyaudio を用いて録音する部分も記載しています。

app.stt.py

import asyncio
import os
import tempfile
import wave

import pyaudio  # type: ignore
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import AzureAudioToText
from semantic_kernel.contents import AudioContent

from app.libs.config import app_settings


async def main() -> None:
    kernel = Kernel()
    kernel.add_service(
        service=AzureAudioToText(
            service_id=f"{app_settings.SERVICE_ID}_audio",
            deployment_name="whisper",
            api_key=app_settings.OPENAI_COMPLETION_API_KEY,
            endpoint=app_settings.OPENAI_COMPLETION_ENDPOINT,
        )
    )

    service = kernel.get_service(service_id=f"{app_settings.SERVICE_ID}_audio")
    if not isinstance(service, AzureAudioToText):
        raise Exception("Invalid Value")

    wav_file_path: str = record_audio_to_tempfile(5)

    try:
        result = await service.get_text_content(AudioContent.from_audio_file(path=wav_file_path))
        print(result)
    finally:
        print(f"一時ファイル '{wav_file_path}' を削除します...")
        os.remove(wav_file_path)


def record_audio_to_tempfile(duration: int) -> str:
    """音声を録音して一時ファイルに保存"""

    FORMAT = pyaudio.paInt16  # 16ビットフォーマット
    CHANNELS = 1  # モノラル
    RATE = 44100  # サンプルレート（44.1kHz）
    CHUNK = 1024  # バッファサイズ

    # 録音開始
    audio = pyaudio.PyAudio()
    stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK,)
    print("録音開始...")

    frames = []
    try:
        frames = [stream.read(num_frames=CHUNK) for _ in range(int(RATE / CHUNK * duration))]
    finally:
        print("録音終了。")
        stream.stop_stream()
        stream.close()
        audio.terminate()

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
        with wave.open(f=temp_file.name, mode="wb") as wf:
            wf.setnchannels(nchannels=CHANNELS)
            wf.setsampwidth(sampwidth=audio.get_sample_size(format=FORMAT))
            wf.setframerate(framerate=RATE)
            wf.writeframes(data=b"".join(frames))
        return temp_file.name


if __name__ == "__main__":
    asyncio.run(main())

Multi Agent

複数の Agent を定義してグループチャットで自動的に会話し、結論を返却できます。

from pydantic_core import Url
from semantic_kernel import Kernel
from semantic_kernel.agents import Agent, AgentGroupChat
from semantic_kernel.agents.open_ai import AzureAssistantAgent
from semantic_kernel.agents.strategies.termination.termination_strategy import TerminationStrategy
from semantic_kernel.contents import AuthorRole, ChatMessageContent

from app.libs.config import app_settings

# 終了ロジックのカスタム
class ApprovalTerminationStrategy(TerminationStrategy):
    async def should_agent_terminate(self, agent: Agent, history: list[ChatMessageContent]) -> bool:
        return False

# Agent 切り替えロジックをカスタムする場合
# from semantic_kernel.agents.strategies.selection.selection_strategy import SelectionStrategy
# class ApprovalSelectionStrategy(SelectionStrategy):
#     async def next(self, agents: list[Agent], history: list[ChatMessageContent]) -> Agent:
#         return await super().next(agents, history)


async def main():
    kernel = Kernel()
    agent_a = await AzureAssistantAgent.create(
        # id="asst_********************", # 既存 Agent に接続する場合
        service_id=app_settings.SERVICE_ID + "_a",
        kernel=kernel,
        api_key=app_settings.OPENAI_COMPLETION_API_KEY,
        deployment_name=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
        api_version="2024-05-01-preview",
        endpoint=Url(app_settings.OPENAI_COMPLETION_ENDPOINT),
        enable_code_interpreter=False,
        enable_file_search=False,
        instructions="あなたは漫才師であり、ボケ担当です。ボケてください。ツッコミ担当から返事をします。",
        max_completion_tokens=200,
    )

    agent_b = await AzureAssistantAgent.create(
        # id="asst_********************", # 既存 Agent に接続する場合
        service_id=app_settings.SERVICE_ID + "_b",
        kernel=kernel,
        api_key=app_settings.OPENAI_COMPLETION_API_KEY,
        deployment_name=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
        api_version="2024-05-01-preview",
        endpoint=Url(app_settings.OPENAI_COMPLETION_ENDPOINT),
        enable_code_interpreter=False,
        enable_file_search=False,
        instructions="あなたは漫才師であり、ツッコミ担当です。雑談をしつつ、ボケが来たら突っ込みます。",
        max_completion_tokens=200,
    )

    group_chat = AgentGroupChat(
        agents=[agent_a, agent_b],
        termination_strategy=ApprovalTerminationStrategy(maximum_iterations=3, automatic_reset=False),
    )
    await group_chat.add_chat_message(ChatMessageContent(role=AuthorRole.USER, content="お題は「バナナ」です。"))

    async for message_content in group_chat.invoke():
        print(">>", message_content.content)


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())

Azure AI Inference

ChatCompletion

Semantic Kernel で Azure AI Inference SDK を使用するように変更することで、Azure AI Service も統合可能。事前に pip install semantic-kernel[azure] を実行しよう

from aiohttp import ClientSession
from azure.ai.inference.aio import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.azure_ai_inference import AzureAIInferenceChatCompletion
from semantic_kernel.contents import ChatHistory

from config import app_settings


async def main():
    session = ClientSession()

    client = ChatCompletionsClient(
        endpoint=f"{app_settings.OPENAI_COMPLETION_ENDPOINT.strip('/')}/openai/deployments/{app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME}",
        credential=AzureKeyCredential(key=app_settings.OPENAI_COMPLETION_API_KEY),
        session=session,
    )

    kernel = Kernel()
    kernel.add_service(
        # azure openai service のモデルを使用するとき
        service=AzureAIInferenceChatCompletion(
            ai_model_id=app_settings.OPENAI_COMPLETION_DEPLOYMENT_NAME,
            service_id="openai",
            client=client,
        )
    )
    kernel.add_service(
        # azure ai service のモデルを使用するとき
        service=AzureAIInferenceChatCompletion(
            ai_model_id=app_settings.AZURE_AI_MODEL_ID,
            api_key=app_settings.AZURE_AI_APIKEY,
            endpoint=app_settings.AZURE_AI_MODEL_ENDPOINT,
            service_id="deepseek",
        )
    )

    # 以下は openai として登録したサービスで推論する場合
    history = ChatHistory()
    service = kernel.get_service(service_id="openai")
    settings = kernel.get_prompt_execution_settings_from_service_id(service_id="openai")

    if not isinstance(service, AzureAIInferenceChatCompletion):
        raise ValueError("Service is not AzureAIInferenceChatCompletion")

    history.add_user_message("Hello, how are you?")
    r = await service.get_chat_message_content(chat_history=history, settings=settings)

    if r:
        print(r.content)

    await session.close()


if __name__ == "__main__":
    import asyncio

    asyncio.run(main())

... 以降工事中 - 順次記載 ...

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up