データセットのダウンロード
git clone https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA
ドキュメントのダウンロード
download_documents.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data
# %%
import pandas as pd
df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df
# %%
import os
import requests
for index, row in df.iterrows():
url = row['url']
file_name = row['file_name']
print(url)
file = f"data/{file_name}"
if os.path.exists(file):
continue
try:
buffer = requests.get(url=url, timeout=10)
with open(file=file, mode="wb") as f:
f.write(buffer.content)
except Exception as e:
print(f"{e}")
# %%
ドキュメントからコンテキストを抽出
extract_context.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data
# %%
import pandas as pd
df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df
# %%
import os
import requests
for index, row in df.iterrows():
url = row['url']
file_name = row['file_name']
print(url)
file = f"data/{file_name}"
if os.path.exists(file):
continue
try:
buffer = requests.get(url=url, timeout=10)
with open(file=file, mode="wb") as f:
f.write(buffer.content)
except Exception as e:
print(f"{e}")
# %%
Azure Open AI/GPT-3.5-turboによる回答の生成=実績値
templates.py
# %%
template = """# 指示:
与えられた文脈にもとづいて質問に回答してください。
# 文脈:
{context}
# 質問:
{question}
# 回答:
"""
reference_text.py
# %%
#!pip install azure.identity
#!pip install langchain_openai
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID")
AZURE_CLIENT_ID = os.environ.get("AZURE_CLIENT_ID")
AZURE_CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET")
AZURE_OPENAI_ENDPOINT = os.environ.get('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
API_VERSION = os.environ.get("API_VERSION")
# %%
from azure.identity import ClientSecretCredential, get_bearer_token_provider
credential = ClientSecretCredential(
tenant_id=AZURE_TENANT_ID,
client_id=AZURE_CLIENT_ID,
client_secret=AZURE_CLIENT_SECRET
)
# %%
scopes = "https://cognitiveservices.azure.com/.default"
azure_ad_token_provider = get_bearer_token_provider(credential, scopes)
# %%
from langchain_openai import AzureChatOpenAI
temperature = 0
max_tokens = 4096
llm = AzureChatOpenAI(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_version=API_VERSION,
azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
azure_ad_token_provider=azure_ad_token_provider,
temperature=temperature,
max_tokens=max_tokens,
)
# %%
from templates import template
from langchain_core.prompts import PromptTemplate
prompt_template = PromptTemplate.from_template(template=template)
# %%
import pandas as pd
df = pd.read_csv("context.csv")
df
# %%
df.dropna(subset=["context"], inplace=True)
df.shape
# %%
reference_text = []
for index, row in df.iterrows():
print(index)
question = row["question"]
context = row["context"]
input = prompt_template.format(question=question, context=context)
print(input)
try:
response = llm.invoke(input=input)
reference_text.append(response.content)
except Exception as e:
reference_text.append("")
print(f"{e}")
# %%
reference_text
# %%
df["reference_text"] = reference_text
df
# %%
import csv
df.to_csv(path_or_buf="reference_text.csv", index=False, quoting=csv.QUOTE_ALL)
# %%
watsonx.ai/Mistral Largeによる回答の生成=予測値
genrated_text.py
# %%
from langchain_ibm import WatsonxLLM
model_id = 'mistralai/mistral-large'
project_id = PROJECT_ID
url = 'https://us-south.ml.cloud.ibm.com/'
apikey = API_KEY
params = {
'decoding_method': 'greedy',
'max_new_tokens': 16384,
'min_new_tokens': 1,
'repetition_penalty': 1.02,
"stop_sequences": ["</s>"]
}
llm = WatsonxLLM(
model_id=model_id,
project_id=project_id,
url=url,
params=params,
apikey=apikey
)
# %%
import pandas as pd
df = pd.read_csv("reference_text.csv")
df
# %%
from templates import template
from langchain_core.prompts import PromptTemplate
prompt_template = PromptTemplate.from_template(template=template)
# %%
generated_text = []
for index, row in df.iterrows():
print(index)
question = row["question"]
context = row["context"]
input = prompt_template.format(question=question, context=context)
# print(input)
try:
response = llm.invoke(input=input)
generated_text.append(response)
except Exception as e:
generated_text.append("")
print(f"{e}")
# %%
generated_text
# %%
df["generated_text"] = generated_text
df
# %%
import csv
df.to_csv(path_or_buf="generated_text.csv", index=False, quoting=csv.QUOTE_ALL)
# %%
評価
実績値=GPT-3.5-turboに対する推定値=mistral large2の評価
evaluate.py
# %%
#!pip install -U ibm_watson_openscale
#!pip install -U "ibm-metrics-plugin[generative-ai-quality]~=3.0.11"
#!pip install --upgrade shap==0.42.1
# %%
#import spacy
#spacy.cli.download("en_core_web_sm")
#spacy.cli.download("ja_core_news_sm")
#!python -m nltk.downloader punkt
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *
authenticator = CloudPakForDataAuthenticator(
url=CPD_URL,
username=CPD_USERNAME,
apikey=CPD_API_KEY,
disable_ssl_verification=True
)
wos_client = APIClient(
service_url=CPD_URL,
authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)
# %%
import pandas as pd
df = pd.read_csv("generated_text.csv").iloc[:10]
df
# %%
sources = df[["context", "question"]].copy()
sources
# %%
predictions = df[["generated_text"]].copy()
predictions
# %%
references = df[["reference_text"]].copy()
references
# %%
language_code = "ja"
configuration = {
"configuration": {
"record_level":False,
"context_columns":["context"],
"question_column": "question",
"retrieval_augmented_generation": {
"content_analysis": {},
"faithfulness": {},
"answer_relevance": {},
"unsuccessful_requests": {},
"hap_score": {},
"pii": {
"language_code" : language_code
}
},
"language_code" : language_code
}
}
# %%
print(configuration)
# %%
result = wos_client.llm_metrics.compute_metrics(configuration, sources, predictions, references)
result
# %%
import json
metrics = wos_client.llm_metrics.get_metrics_result(configuration, result)
while "in_progress" in json.dumps(metrics):
print("in_progress")
metrics = wos_client.llm_metrics.get_metrics_result(configuration, result)
metrics
# %%
{'answer_relevance': {'total_records': 10,
'max': 0.9669,
'mean': 0.9135,
'metric_value': 0.9135,
'min': 0.7612},
'faithfulness': {'total_records': 10,
'max': 0.0668,
'mean': 0.0409,
'metric_value': 0.0409,
'min': 0.0151},
'hap_score': {'total_records': 10,
'max': 0.03231289982795715,
'mean': 0.0074,
'metric_value': 0.0074,
'min': 0.002128486754372716},
'pii': {'total_records': 10,
'max': 0,
'mean': 0.0,
'metric_value': 0.0,
'min': 0},
'content_analysis': {'coverage': {'metric_value': 0.0416,
'mean': 0.0416,
'min': 0.0,
'max': 0.2,
'std': 0.0621},
'density': {'metric_value': 0.001,
'mean': 0.001,
'min': 0.0,
'max': 0.0067,
'std': 0.002},
'abstractness': {'metric_value': 0.7719,
'mean': 0.7719,
'min': 0.4194,
'max': 1.0,
'std': 0.2401}},
'unsuccessful_requests': {'metric_value': 0.0,
'mean': 0.0,
'min': 0,
'max': 0,
'std': 0.0}}