ファクトチェックというものをAIモデルで効率的に行う試みです。

Last updated at 2024-11-02Posted at 2024-11-02

ファクトチェック。

昨今の情報化社会　ファクトチェックを必要とする機会は　とても多いです。
ファクトチェックというものをAIモデルで効率的に行う試みです。

プロンプトの内容が事実であるかを膨大なテキストデータで訓練されたAIモデルが判定できるのかを検証する試みです。

import http.server
import socketserver
import webbrowser
import os
import json
from transformers import AutoTokenizer, AutoModelForCausalLM

# モデルとトークナイザーの読み込み
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")

# テキスト生成関数
def generate_text(prompt, max_length=350, num_return_sequences=1):
    inputs = tokenizer.encode(prompt, return_tensors='pt')
    outputs = model.generate(
        inputs,
        max_length=max_length,
        num_return_sequences=num_return_sequences,
        no_repeat_ngram_size=2,
        top_k=50,
        top_p=0.95,
        temperature=0.7
    )
    return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

# ファクトチェック関数の定義
def check_fact(prompt):
    # モデルにファクトチェックを依頼するプロンプトを作成
    fact_check_prompt = f"以下のテキストが事実かどうかを確認し、「事実」または「事実ではない」と答えてください: {prompt}"
    
    # ファクトチェックの結果を取得
    fact_check_result = generate_text(fact_check_prompt, max_length=350, num_return_sequences=1)[0]

    # 判定結果を解析
    if "事実" in fact_check_result:
        is_fact = True if "事実" in fact_check_result and "事実ではない" not in fact_check_result else False
        reason = "事実として認識されました。" if is_fact else "事実ではない可能性があります。"
    else:
        is_fact = False
        reason = "事実確認ができませんでした。事実ではない可能性があります。"
    
    return is_fact, reason

# HTMLコンテンツの生成
html_content = '''
<!DOCTYPE html>
<html lang="ja">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Chat with GPT-Qwen2.5-0.5B-Instruct</title>
    <style>
        body { font-family: Arial, sans-serif; background-color: #f4f4f4; }
        .container { width: 80%; margin: auto; overflow: hidden; }
        header { background: blue; color: white; padding: 10px 0; text-align: center; }
        #chat-box { background: white; border: 1px solid #ddd; padding: 10px; margin-top: 20px; max-height: 400px; overflow-y: auto; }
        #chat-input { width: 100%; padding: 10px; margin-top: 10px; box-sizing: border-box; }
        button { padding: 10px; background: blue; color: white; border: none; cursor: pointer; margin-top: 10px; }
    </style>
</head>
<body>
    <header>
        <h1>Chat with GPT-Qwen2.5-0.5B-Instruct</h1>
    </header>
    <div class="container">
        <div id="chat-box"></div>
        <input type="text" id="chat-input" placeholder="メッセージを入力してください...">
        <button onclick="sendMessage()">送信</button>
    </div>

    <script>
        async function sendMessage() {
            const inputElement = document.getElementById('chat-input');
            const chatBox = document.getElementById('chat-box');
            const prompt = inputElement.value;

            if (prompt.trim() === '') return;

            // Display user's prompt
            const userMessage = document.createElement('div');
            userMessage.innerHTML = `<strong>あなた:</strong> ${prompt}`;
            chatBox.appendChild(userMessage);

            // Send prompt to server and get generated text
            const response = await fetch('http://localhost:8000', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({ prompt: prompt })
            });
            const data = await response.json();
            const generatedText = data.generated_text;
            const isFact = data.is_fact ? "事実" : "事実ではない";
            const reason = data.reason;
            const reportMessage = data.report ? "（通報が必要です）" : "";

            // Display GPT-2's response
            const botMessage = document.createElement('div');
            botMessage.innerHTML = `<strong>Qwen GPT:</strong> ${generatedText} <br> <strong>判定:</strong> ${isFact} ${reportMessage} <br> <strong>理由:</strong> ${reason}`;
            chatBox.appendChild(botMessage);

            // Clear the input
            inputElement.value = '';
            chatBox.scrollTop = chatBox.scrollHeight;
        }
    </script>
</body>
</html>
'''

# HTTPサーバーの設定
PORT = 8000
DIRECTORY = os.getcwd()

class Handler(http.server.SimpleHTTPRequestHandler):
    def do_POST(self):
        content_length = int(self.headers['Content-Length'])
        post_data = self.rfile.read(content_length)
        data = json.loads(post_data.decode('utf-8'))

        prompt = data['prompt']
        generated_text = generate_text(prompt, max_length=350, num_return_sequences=1)[0]

        # ファクトチェック実行
        is_fact, reason = check_fact(prompt)
        
        # 必要に応じて通報
        report = not is_fact

        response = {
            'generated_text': generated_text,
            'is_fact': is_fact,
            'report': report,
            'reason': reason
        }
        self.send_response(200)
        self.send_header('Content-type', 'application/json')
        self.send_header('Access-Control-Allow-Origin', '*')
        self.end_headers()
        self.wfile.write(json.dumps(response).encode('utf-8'))

    def end_headers(self):
        self.send_header('Access-Control-Allow-Origin', '*')
        super().end_headers()

def start_server():
    # HTMLファイルを一時的に生成
    html_file = 'chat.html'
    with open(html_file, 'w', encoding='utf-8') as file:
        file.write(html_content)

    os.chdir(DIRECTORY)
    with socketserver.TCPServer(("", PORT), Handler) as httpd:
        print(f"Serving at http://localhost:{PORT}")
        webbrowser.open(f'http://localhost:{PORT}/{html_file}')
        httpd.serve_forever()

if __name__ == "__main__":
    start_server()

「本記事は、技術的な視点から情報を提供することを目的としております。内容については可能な限り正確性を期しておりますが、記事内の見解は執筆者の意見や理解に基づいており、すべての方にとって普遍的な結論を示すものではありません。技術の分野は常に進化し、新たな知見が追加されることもあります。ご意見がある場合には、建設的な対話を歓迎いたしますが、批判的な意見を展開する際も、お互いの尊重を大切にしたコミュニケーションを心がけていただけると幸いです。」

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up