tiktoken完全ガイド - OpenAI APIのトークン管理を極める

Posted at 2025-06-04

はじめに

OpenAI APIを使用する際、トークン数の管理は避けて通れない重要な要素です。
APIの料金はトークン数に基づいて計算され、また各モデルには最大トークン数の制限があります。

この記事では、OpenAIが公式に提供しているtiktokenライブラリを使って、
効率的にトークンを管理する方法を解説します。

tiktokenとは？

tiktokenは、OpenAIが開発したBPE（Byte Pair Encoding）トークナイザーのPython実装です。GPTモデルが実際に使用しているトークナイザーと同じアルゴリズムを使用しているため、正確なトークン数を計算できます。

主な特徴

🚀 高速: Rustで実装されており、純粋なPython実装より50倍以上高速
🎯 正確: OpenAI APIが実際に使用するトークン数と完全に一致
🔧 簡単: シンプルなAPIで直感的に使用可能
🌍 多言語対応: 日本語を含む多言語のトークン化に対応

インストール

pip install tiktoken

基本的な使い方

1. トークン数をカウントする

import tiktoken

# モデルに対応したエンコーディングを取得
encoding = tiktoken.encoding_for_model("gpt-4")

# テキストをトークン化してカウント
text = "こんにちは、世界！今日はいい天気ですね。"
tokens = encoding.encode(text)
token_count = len(tokens)

print(f"テキスト: {text}")
print(f"トークン数: {token_count}")
print(f"トークン: {tokens}")

出力:

テキスト: こんにちは、世界！今日はいい天気ですね。
トークン数: 17
トークン: [44293, 3574, 244, 85642, 3574, 244, 162, 97, 222, 9458, 29107, 3574, 227, 29295, 38641, 16616, 3574, 227, 16556, 3574, 223, 1811]

2. トークンをテキストに戻す

# トークンをデコードしてテキストに戻す
decoded_text = encoding.decode(tokens)
print(f"デコード結果: {decoded_text}")

3. 部分的なデコード

# 特定のトークンだけをデコード
single_token_text = encoding.decode([44293])  # "こんにちは"の最初のトークン
print(f"単一トークン: {single_token_text}")

実践的な使用例

1. API料金の事前計算

def calculate_api_cost(text, model="gpt-4"):
    """
    テキストのAPI使用料金を計算する
    
    Args:
        text: 入力テキスト
        model: 使用するモデル
    
    Returns:
        推定料金（USD）
    """
    # 料金設定（2024年1月時点の例）
    pricing = {
        "gpt-4": {"input": 0.03, "output": 0.06},  # per 1K tokens
        "gpt-3.5-turbo": {"input": 0.001, "output": 0.002}
    }
    
    encoding = tiktoken.encoding_for_model(model)
    token_count = len(encoding.encode(text))
    
    # 入力コストを計算（出力は入力の約1.5倍と仮定）
    input_cost = (token_count / 1000) * pricing[model]["input"]
    output_cost = (token_count * 1.5 / 1000) * pricing[model]["output"]
    
    total_cost = input_cost + output_cost
    
    return {
        "input_tokens": token_count,
        "estimated_output_tokens": int(token_count * 1.5),
        "input_cost": input_cost,
        "output_cost": output_cost,
        "total_cost": total_cost
    }

# 使用例
long_text = "あなたのプロジェクトの説明..." * 100
cost_estimate = calculate_api_cost(long_text, "gpt-4")
print(f"推定コスト: ${cost_estimate['total_cost']:.4f}")

2. トークン制限に合わせたテキストの切り詰め

def truncate_text_to_token_limit(text, max_tokens=4000, model="gpt-4"):
    """
    テキストを指定したトークン数以内に切り詰める
    
    Args:
        text: 入力テキスト
        max_tokens: 最大トークン数
        model: 使用するモデル
    
    Returns:
        切り詰められたテキスト
    """
    encoding = tiktoken.encoding_for_model(model)
    tokens = encoding.encode(text)
    
    if len(tokens) <= max_tokens:
        return text
    
    # トークン制限に収まるように切り詰め
    truncated_tokens = tokens[:max_tokens]
    truncated_text = encoding.decode(truncated_tokens)
    
    return truncated_text

# 使用例
long_article = "とても長い記事の内容..." * 1000
truncated = truncate_text_to_token_limit(long_article, max_tokens=1000)
print(f"切り詰め後の長さ: {len(truncated)}")

3. チャット履歴のトークン管理

def manage_chat_history(messages, max_tokens=4000, model="gpt-4"):
    """
    チャット履歴を管理し、トークン制限内に収める
    
    Args:
        messages: チャットメッセージのリスト
        max_tokens: 最大トークン数
        model: 使用するモデル
    
    Returns:
        トークン制限内のメッセージリスト
    """
    encoding = tiktoken.encoding_for_model(model)
    
    # 各メッセージのトークン数を計算
    message_tokens = []
    for msg in messages:
        # roleとcontentの両方をカウント
        token_count = len(encoding.encode(f"{msg['role']}: {msg['content']}"))
        message_tokens.append((msg, token_count))
    
    # 最新のメッセージから順に、制限内に収まるメッセージを選択
    selected_messages = []
    total_tokens = 0
    
    for msg, tokens in reversed(message_tokens):
        if total_tokens + tokens <= max_tokens:
            selected_messages.insert(0, msg)
            total_tokens += tokens
        else:
            break
    
    return selected_messages, total_tokens

# 使用例
chat_history = [
    {"role": "user", "content": "こんにちは"},
    {"role": "assistant", "content": "こんにちは！何かお手伝いできることはありますか？"},
    {"role": "user", "content": "Pythonについて教えてください"},
    # ... 多くのメッセージ
]

managed_history, token_count = manage_chat_history(chat_history, max_tokens=1000)
print(f"管理後のメッセージ数: {len(managed_history)}")
print(f"合計トークン数: {token_count}")

4. 日本語と英語のトークン効率比較

def compare_language_efficiency(japanese_text, english_text, model="gpt-4"):
    """
    日本語と英語のトークン効率を比較
    """
    encoding = tiktoken.encoding_for_model(model)
    
    jp_tokens = encoding.encode(japanese_text)
    en_tokens = encoding.encode(english_text)
    
    jp_chars_per_token = len(japanese_text) / len(jp_tokens)
    en_chars_per_token = len(english_text) / len(en_tokens)
    
    print(f"日本語テキスト:")
    print(f"  文字数: {len(japanese_text)}")
    print(f"  トークン数: {len(jp_tokens)}")
    print(f"  文字/トークン: {jp_chars_per_token:.2f}")
    
    print(f"\n英語テキスト:")
    print(f"  文字数: {len(english_text)}")
    print(f"  トークン数: {len(en_tokens)}")
    print(f"  文字/トークン: {en_chars_per_token:.2f}")

# 使用例
jp_text = "人工知能は私たちの生活を大きく変えています。"
en_text = "Artificial intelligence is significantly changing our lives."

compare_language_efficiency(jp_text, en_text)

応用例：RAGシステムでの活用

class TokenAwareRAG:
    """トークン数を考慮したRAGシステム"""
    
    def __init__(self, model="gpt-4", context_token_limit=3000):
        self.model = model
        self.encoding = tiktoken.encoding_for_model(model)
        self.context_token_limit = context_token_limit
    
    def create_context(self, query, retrieved_documents):
        """
        検索結果からトークン制限内でコンテキストを作成
        """
        query_tokens = len(self.encoding.encode(query))
        remaining_tokens = self.context_token_limit - query_tokens - 100  # 余裕を持たせる
        
        context_parts = []
        current_tokens = 0
        
        for doc in retrieved_documents:
            doc_text = f"[文書]: {doc['content']}\n"
            doc_tokens = len(self.encoding.encode(doc_text))
            
            if current_tokens + doc_tokens <= remaining_tokens:
                context_parts.append(doc_text)
                current_tokens += doc_tokens
            else:
                # 部分的に追加できる場合
                available_tokens = remaining_tokens - current_tokens
                if available_tokens > 50:  # 最低50トークンは確保
                    truncated = self._truncate_to_tokens(doc_text, available_tokens)
                    context_parts.append(truncated)
                break
        
        return "\n".join(context_parts), current_tokens
    
    def _truncate_to_tokens(self, text, max_tokens):
        """テキストを指定トークン数に切り詰め"""
        tokens = self.encoding.encode(text)
        if len(tokens) <= max_tokens:
            return text
        return self.encoding.decode(tokens[:max_tokens])

# 使用例
rag = TokenAwareRAG()
documents = [
    {"content": "長い文書1の内容..."},
    {"content": "長い文書2の内容..."},
    {"content": "長い文書3の内容..."},
]

context, token_count = rag.create_context("質問テキスト", documents)
print(f"コンテキストのトークン数: {token_count}")

各モデルのエンコーディング

# 利用可能なエンコーディング一覧
print("利用可能なエンコーディング:")
for encoding_name in tiktoken.list_encoding_names():
    print(f"  - {encoding_name}")

# モデルごとの対応
model_encodings = {
    "gpt-4": "cl100k_base",
    "gpt-3.5-turbo": "cl100k_base", 
    "text-embedding-ada-002": "cl100k_base",
    "gpt-2": "gpt2",
}

# 特定のエンコーディングを直接使用
encoding = tiktoken.get_encoding("cl100k_base")

パフォーマンスのヒント

1. エンコーディングの再利用

# ❌ 非効率：毎回エンコーディングを作成
def count_tokens_slow(texts):
    counts = []
    for text in texts:
        encoding = tiktoken.encoding_for_model("gpt-4")  # 毎回作成
        counts.append(len(encoding.encode(text)))
    return counts

# ✅ 効率的：エンコーディングを再利用
def count_tokens_fast(texts):
    encoding = tiktoken.encoding_for_model("gpt-4")  # 一度だけ作成
    counts = []
    for text in texts:
        counts.append(len(encoding.encode(text)))
    return counts

2. バッチ処理

# 複数のテキストを効率的に処理
def batch_tokenize(texts, model="gpt-4"):
    encoding = tiktoken.encoding_for_model(model)
    
    results = []
    for text in texts:
        tokens = encoding.encode(text)
        results.append({
            "text": text[:50] + "..." if len(text) > 50 else text,
            "token_count": len(tokens),
            "estimated_cost": (len(tokens) / 1000) * 0.03  # GPT-4の例
        })
    
    return results

まとめ

tiktokenは、OpenAI APIを使用する際のトークン管理に欠かせないツールです。主な活用場面：

コスト管理: API使用前に料金を正確に見積もり
制限対応: モデルのトークン制限に合わせてテキストを調整
最適化: トークン効率を考慮したプロンプト設計
監視: トークン使用量の追跡とレポート

特にRAGシステムやチャットボットなど、大量のテキストを扱うアプリケーションでは、tiktokenを活用することで、パフォーマンスとコストの両面で大きな改善が期待できます。

参考リンク

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up