【実践】JupyterNotebookでAgentic RAGの中身をゴリゴリ作ってみる

Posted at 2025-12-01

【実践編】JupyterNotebookで作るAgentic RAG

今日伝えたいこと

Agentic RAGをライブラリ少な目でゴリゴリ実装する
Query Planning → Tool Use → Self-Reflectionの順で段階的に構築
最後に従来RAGと性能比較してみる

はじめに

前回の記事でAgentic RAGの概念を勉強したミンミン。

今回はJupyterNotebookで実際に実装してみます。

コードを動かしながら中身をつくってみたく・・・

環境構築

必要なライブラリ

# セル1: ライブラリのインストール
import sys
!{sys.executable} -m pip install langchain langchain-openai langchain-community langgraph chromadb tenacity matplotlib

APIキーの設定

# セル2: 環境変数の設定
import os
import getpass

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI APIキーを入力してください: ")

print("APIキーが設定されました")

基本インポート

# セル3: 基本インポート
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document
from typing import List, Dict, Any, Optional
import json
import re

# LLMの初期化
llm = ChatOpenAI(model="gpt-4o", temperature=0)
print("セットアップ完了！")

Step1: 従来RAGを実装する（比較用）

まずは従来RAGを作って、後でAgentic RAGと比較しますね。

ベクトルDBの準備

# セル4: サンプルデータとベクトルDBの準備
from langchain_community.vectorstores import Chroma

# サンプルドキュメント（実際のプロジェクトではあなたのデータを使用）
sample_documents = [
    Document(page_content="Python は汎用プログラミング言語です。機械学習やWeb開発に広く使われています。", metadata={"source": "programming"}),
    Document(page_content="RAGはRetrieval-Augmented Generationの略で、検索と生成を組み合わせた手法です。", metadata={"source": "ai"}),
    Document(page_content="LangChainはLLMアプリケーション開発のためのフレームワークです。", metadata={"source": "ai"}),
    Document(page_content="ベクトルデータベースは、埋め込みベクトルを効率的に検索するためのDBです。", metadata={"source": "ai"}),
    Document(page_content="GPT-4oはOpenAIが開発した最新のマルチモーダルLLMです。", metadata={"source": "ai"}),
]

# ベクトルDBの作成
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(sample_documents, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

print(f"ベクトルDB作成完了！ドキュメント数: {len(sample_documents)}")

従来RAGの実装

# セル5: 従来RAGの実装
class TraditionalRAG:
    """シンプルな従来型RAG"""

    def __init__(self, retriever, llm):
        self.retriever = retriever
        self.llm = llm

    def query(self, question: str) -> Dict[str, Any]:
        """質問に回答する"""
        # 1. 検索
        docs = self.retriever.invoke(question)

        # 2. コンテキスト作成
        context = "\n".join([doc.page_content for doc in docs])

        # 3. 回答生成
        prompt = ChatPromptTemplate.from_template("""
        以下のコンテキストを参考に質問に答えてください。

        コンテキスト：
        {context}

        質問：{question}

        回答：
        """)

        response = self.llm.invoke(prompt.format(context=context, question=question))

        return {
            "answer": response.content,
            "retrieved_docs": [doc.page_content for doc in docs],
            "retrieval_count": 1  # 従来RAGは1回のみ
        }

# テスト
traditional_rag = TraditionalRAG(retriever, llm)
result = traditional_rag.query("RAGとは何ですか？")
print("【従来RAGの回答】")
print(result["answer"])

Step2: Query Plannerを実装する

複雑なクエリを分解する機能を作ってみます。

クエリ複雑度の分析

# セル6: Query Planner の実装
class QueryPlanner:
    """クエリを分析し、実行計画を作成するクラス"""

    def __init__(self, llm):
        self.llm = llm

    def analyze_complexity(self, query: str) -> Dict[str, Any]:
        """クエリの複雑度を分析"""

        prompt = ChatPromptTemplate.from_template("""
        以下のクエリを分析してください。

        クエリ：{query}

        以下のJSON形式で返してください（```jsonなどは不要）：
        {{
            "complexity": "simple" または "moderate" または "complex",
            "requires_multi_step": true または false,
            "reasoning": "判断理由"
        }}
        """)

        response = self.llm.invoke(prompt.format(query=query))
        return self._parse_json(response.content)

    def create_plan(self, query: str) -> List[Dict]:
        """実行計画を作成"""

        prompt = ChatPromptTemplate.from_template("""
        以下のクエリに対する実行計画を作成してください。

        クエリ：{query}

        以下のJSON形式で返してください（```jsonなどは不要）：
        {{
            "steps": [
                {{
                    "step_number": 1,
                    "action": "実行内容",
                    "search_query": "検索クエリ"
                }}
            ]
        }}
        """)

        response = self.llm.invoke(prompt.format(query=query))
        result = self._parse_json(response.content)
        return result.get("steps", [])

    def _parse_json(self, text: str) -> Dict:
        """JSONをパース"""
        try:
            # ```json ... ``` を除去
            text = re.sub(r'```json\s*', '', text)
            text = re.sub(r'```\s*', '', text)
            return json.loads(text)
        except:
            return {}

# Query Plannerのテスト
planner = QueryPlanner(llm)

# 単純なクエリ
simple_result = planner.analyze_complexity("Pythonとは？")
print("【単純クエリの分析】")
print(json.dumps(simple_result, ensure_ascii=False, indent=2))

# 複雑なクエリ
complex_result = planner.analyze_complexity("RAGとLangChainの関係を説明し、実装する際の注意点を教えて")
print("\n【複雑クエリの分析】")
print(json.dumps(complex_result, ensure_ascii=False, indent=2))

# 実行計画
plan = planner.create_plan("RAGとLangChainの関係を説明し、実装する際の注意点を教えて")
print("\n【実行計画】")
for step in plan:
    print(f"  Step {step.get('step_number')}: {step.get('action')}")

この辺の実行計画の作り方とか、AIエージェントの中身に似てるな～って思いながら書いてました。

Step3: Tool Useを実装する

複数のツールから最適なものを選ぶ機能。

ツールの定義

# セル7: ツールの定義
class ToolRegistry:
    """利用可能なツールを管理"""

    def __init__(self, retriever):
        self.retriever = retriever
        self.tools = self._register_tools()

    def _register_tools(self) -> Dict[str, callable]:
        """ツールを登録"""
        return {
            "vector_search": self._vector_search,
            "web_search": self._web_search_mock,  # モック
            "calculator": self._calculator,
        }

    def _vector_search(self, query: str) -> str:
        """ベクトルDB検索"""
        docs = self.retriever.invoke(query)
        return "\n".join([doc.page_content for doc in docs])

    def _web_search_mock(self, query: str) -> str:
        """Web検索（モック）"""
        # 実際の実装ではSerper APIなどを使用
        return f"[Web検索結果] '{query}' に関する最新情報：（実際の実装ではAPIを使用）"

    def _calculator(self, expression: str) -> str:
        """計算を実行"""
        try:
            result = eval(expression)
            return f"計算結果: {result}"
        except:
            return "計算エラー"

    def execute(self, tool_name: str, query: str) -> str:
        """ツールを実行"""
        if tool_name in self.tools:
            return self.tools[tool_name](query)
        return "ツールが見つかりません"

# テスト
tool_registry = ToolRegistry(retriever)
print("【ベクトル検索テスト】")
print(tool_registry.execute("vector_search", "RAGとは"))
print("\n【計算テスト】")
print(tool_registry.execute("calculator", "100 * 1.08"))

ツール選択ロジック

# セル8: ツール選択ロジック
class ToolSelector:
    """クエリに基づいて最適なツールを選択"""

    SELECTION_RULES = {
        "realtime": {
            "keywords": ["今", "最新", "今日", "現在", "ニュース"],
            "tool": "web_search"
        },
        "calculation": {
            "keywords": ["計算", "合計", "平均", "%", "円"],
            "tool": "calculator"
        },
        "knowledge": {
            "keywords": ["とは", "説明", "方法", "仕組み"],
            "tool": "vector_search"
        }
    }

    def select(self, query: str) -> List[str]:
        """クエリから推奨ツールを選択"""
        selected = []

        for category, config in self.SELECTION_RULES.items():
            if any(kw in query for kw in config["keywords"]):
                selected.append(config["tool"])

        # デフォルトはベクトル検索
        if not selected:
            selected = ["vector_search"]

        return list(set(selected))

# テスト
selector = ToolSelector()
print("【ツール選択テスト】")
print(f"'RAGとは？' → {selector.select('RAGとは？')}")
print(f"'今日の最新ニュース' → {selector.select('今日の最新ニュース')}")
print(f"'100×1.08を計算して' → {selector.select('100×1.08を計算して')}")

Step4: Self-Reflectionを実装する

回答の品質を自己評価する機能。

# セル9: Self-Reflection の実装
class AnswerEvaluator:
    """回答の品質を評価"""

    def __init__(self, llm):
        self.llm = llm

    def evaluate(self, query: str, answer: str, context: str) -> Dict[str, Any]:
        """回答を評価"""

        prompt = ChatPromptTemplate.from_template("""
        以下の回答を評価してください。

        質問：{query}
        回答：{answer}
        参照情報：{context}

        以下のJSON形式で返してください（```jsonなどは不要）：
        {{
            "completeness": 0.0から1.0の数値,
            "accuracy": 0.0から1.0の数値,
            "overall_score": 0.0から1.0の数値,
            "needs_improvement": true または false,
            "missing_info": "不足している情報があれば記載",
            "improvement_suggestion": "改善提案"
        }}
        """)

        response = self.llm.invoke(prompt.format(
            query=query,
            answer=answer,
            context=context[:500]  # 長すぎる場合は切り詰め
        ))

        return self._parse_json(response.content)

    def _parse_json(self, text: str) -> Dict:
        """JSONをパース"""
        try:
            text = re.sub(r'```json\s*', '', text)
            text = re.sub(r'```\s*', '', text)
            return json.loads(text)
        except:
            return {"needs_improvement": False, "overall_score": 0.5}

# テスト
evaluator = AnswerEvaluator(llm)

test_evaluation = evaluator.evaluate(
    query="RAGとは何ですか？",
    answer="RAGは検索と生成を組み合わせた手法です。",
    context="RAGはRetrieval-Augmented Generationの略で、検索と生成を組み合わせた手法です。"
)
print("【評価結果】")
print(json.dumps(test_evaluation, ensure_ascii=False, indent=2))

Step5: 統合Agentic RAGを完成させる

3つの機能を統合して、完全なAgentic RAGを作成！

# セル10: 統合Agentic RAG
class AgenticRAG:
    """3つの機能を統合したAgentic RAG"""

    def __init__(self, retriever, llm, max_iterations: int = 3):
        self.llm = llm
        self.planner = QueryPlanner(llm)
        self.tool_registry = ToolRegistry(retriever)
        self.tool_selector = ToolSelector()
        self.evaluator = AnswerEvaluator(llm)
        self.max_iterations = max_iterations

    def query(self, question: str) -> Dict[str, Any]:
        """質問に回答する（Agenticアプローチ）"""

        execution_log = []

        # Phase 1: Query Planning
        complexity = self.planner.analyze_complexity(question)
        execution_log.append(f"複雑度分析: {complexity.get('complexity', 'unknown')}")

        if complexity.get("requires_multi_step", False):
            plan = self.planner.create_plan(question)
            execution_log.append(f"実行計画: {len(plan)}ステップ")
        else:
            plan = [{"step_number": 1, "search_query": question}]

        # Phase 2: Tool Use & Retrieval
        all_context = []
        selected_tools = self.tool_selector.select(question)
        execution_log.append(f"選択ツール: {selected_tools}")

        for step in plan:
            search_query = step.get("search_query", question)
            for tool in selected_tools:
                result = self.tool_registry.execute(tool, search_query)
                all_context.append(result)

        context = "\n".join(all_context)

        # Phase 3: 回答生成 + Self-Reflection ループ
        iteration = 0
        current_answer = ""
        evaluation = {}

        while iteration < self.max_iterations:
            iteration += 1

            # 回答生成
            prompt = ChatPromptTemplate.from_template("""
            以下のコンテキストを参考に質問に答えてください。

            コンテキスト：
            {context}

            質問：{question}

            {previous_feedback}

            回答：
            """)

            previous_feedback = ""
            if current_answer:
                previous_feedback = f"前回の回答への改善提案を反映してください。"

            response = self.llm.invoke(prompt.format(
                context=context,
                question=question,
                previous_feedback=previous_feedback
            ))
            current_answer = response.content

            # Self-Reflection
            evaluation = self.evaluator.evaluate(question, current_answer, context)
            execution_log.append(
                f"Iteration {iteration}: score={evaluation.get('overall_score', 0):.2f}"
            )

            # 十分な品質なら終了
            if not evaluation.get("needs_improvement", True):
                break
            if evaluation.get("overall_score", 0) >= 0.8:
                break

        return {
            "answer": current_answer,
            "complexity": complexity.get("complexity", "unknown"),
            "iterations": iteration,
            "execution_log": execution_log,
            "final_score": evaluation.get("overall_score", 0)
        }

# Agentic RAGのインスタンス作成
agentic_rag = AgenticRAG(retriever, llm)
print("Agentic RAG 準備完了！")

Step6: 従来RAGとの性能比較

# セル11: 性能比較
def compare_rag_systems(question: str):
    """従来RAGとAgentic RAGを比較"""

    print("=" * 60)
    print(f"質問: {question}")
    print("=" * 60)

    # 従来RAG
    print("\n【従来RAG】")
    trad_result = traditional_rag.query(question)
    print(f"回答: {trad_result['answer'][:200]}...")
    print(f"検索回数: {trad_result['retrieval_count']}")

    # Agentic RAG
    print("\n【Agentic RAG】")
    agent_result = agentic_rag.query(question)
    print(f"回答: {agent_result['answer'][:200]}...")
    print(f"複雑度: {agent_result['complexity']}")
    print(f"反復回数: {agent_result['iterations']}")
    print(f"最終スコア: {agent_result['final_score']:.2f}")
    print(f"実行ログ:")
    for log in agent_result['execution_log']:
        print(f"  - {log}")

# テスト1: 単純な質問
compare_rag_systems("RAGとは何ですか？")

# テスト2: 複雑な質問
compare_rag_systems("RAGとLangChainの関係を説明し、それぞれの特徴を比較してください")

結果、テスト2では反復回数１回で、５つの実行計画が出力されました。
もうちょいちゃんとドキュメントとかいっぱい入れれば、もっとうまくいくかも＿？

まとめ

ステップ	実装内容
Step1	従来RAG（比較用）
Step2	Query Planner（クエリ分解）
Step3	Tool Use（ツール選択）
Step4	Self-Reflection（自己評価）
Step5	統合Agentic RAG
Step6	性能比較

実装のポイント

段階的に構築することで理解が深まる
Self-Reflectionは品質向上に効果的
Tool選択はキーワードベースでシンプルに始められる
max_iterationsで無限ループを防止

参考リンク

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up