0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

RAG-Evaluation-Dataset-JAに関する備忘録

Last updated at Posted at 2025-04-10

ダウンロード

git clone https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA
# %%
#!pip install pandas

# %%
import pandas as pd

df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df

# %%
import os
import requests

for index, row in df.iterrows():
    url = row['url']
    file_name = row['file_name']
    print(url)
    file = f"data/{file_name}"
    if os.path.exists(file):
        continue

    try:
        buffer = requests.get(url=url, timeout=10)
        with open(file=file, mode="wb") as f:
            f.write(buffer.content)
    except Exception as e:
        print(f"{e}")

# %%

Contextの追加

# %%
import pandas as pd

df = pd.read_csv("RAG-Evaluation-Dataset-JA/rag_evaluation_result.csv")
df
# %%
import os
from langchain_community.document_loaders import PyPDFLoader

contexts = []
for index, row in df.iterrows():
    target_file_name = row['target_file_name']
    target_page_no = row['target_page_no']
    file_path = f"RAG-Evaluation-Dataset-JA/data/{target_file_name}"
    print(file_path)
    if not os.path.exists(file_path):
        contexts.append("")
        continue

    try:
        loader = PyPDFLoader(file_path=file_path)
        documents = loader.load_and_split()
        context = documents[target_page_no-1].page_content
        contexts.append(context)
    except Exception as e:
        contexts.append("")
        continue
len(context)
# %%
len(contexts)

# %%
df['context'] = contexts
df

# %%
import csv

df.to_csv("RAG-Evaluation-Dataset-JA.csv", index=False, quoting=csv.QUOTE_ALL)

Generate_textの追加

# %%
import os
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
from azure.identity import ClientSecretCredential, get_bearer_token_provider

tenant_id = os.environ.get('AZURE_TENANT_ID')
client_id = os.environ.get('AZURE_CLIENT_ID')
client_secret = os.environ.get('AZURE_CLIENT_SECRET')
credential = ClientSecretCredential(
    tenant_id=tenant_id,
    client_id=client_id,
    client_secret=client_secret
)

# %%
scopes = "https://cognitiveservices.azure.com/.default"
azure_ad_token_provider = get_bearer_token_provider(credential, scopes)

# %%
from langchain_openai import AzureChatOpenAI

azure_endpoint=os.environ.get('AZURE_OPENAI_ENDPOINT')
azure_deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
api_version = os.environ.get("API_VERSION")
temperature = 0
max_tokens = 4096
llm = AzureChatOpenAI(
    azure_endpoint=azure_endpoint,
    api_version=api_version,
    azure_deployment=azure_deployment,
    azure_ad_token_provider=azure_ad_token_provider,
    temperature=temperature,
    max_tokens=max_tokens,
)

# %%
from langchain_core.prompts import PromptTemplate

template = """# 指示:
与えられた文脈をもとに質問に回答してください。

# 文脈:
{context}

# 質問:
{question}

# 回答:
"""
prompt_template = PromptTemplate.from_template(template=template)

# %%
import pandas as pd

filepath = "RAG-Evaluation-Dataset-JA.csv"
df = pd.read_csv(filepath_or_buffer=filepath)
df

# %%
df.dropna(subset=['context'], inplace=True)
df

# %%
import csv

generated_text = []
for index, row in df.iterrows():
    print(index)
    context = row.context
    question = row.question
    try:
        response = llm.invoke(input=prompt_template.format(context=context, question=question))
        generated_text.append(response.content)
    except Exception as e:
        generated_text.append("")
len(generated_text)

# %%
df['generated_text'] = generated_text
df.to_csv(f"RAG-Evaluation-Dataset-JA-generated_text.csv", index=False, quoting=csv.QUOTE_ALL)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?