Detached Prompt Templateの作成
create_detached_prompte_template.py
# %%
#!pip install ibm_aigov_facts_client setuptools
#!pip install tabulate
#!pip install python-dotenv
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
# %%
from ibm_aigov_facts_client import AIGovFactsClient
from ibm_aigov_facts_client import CloudPakforDataConfig
creds=CloudPakforDataConfig(
service_url=CPD_URL,
username=CPD_USERNAME,
api_key=CPD_API_KEY
)
# %%
facts_client = AIGovFactsClient(
cloud_pak_for_data_configs=creds,
container_id=PROJECT_ID,
container_type="project",
disable_tracing=True
)
# %%
from ibm_aigov_facts_client import DetachedPromptTemplate, PromptTemplate
detached_information = DetachedPromptTemplate(
prompt_id="rag",
model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
model_provider="azure",
model_name=AZURE_OPENAI_DEPLOYMENT_NAME,
model_url=AZURE_OPENAI_ENDPOINT,
prompt_url="none",
prompt_additional_info={"model_owner": "openai"}
)
# %%
from rag import template
prompt_template = PromptTemplate(
input=template,
prompt_variables={"context": "", "question": ""},
input_prefix="",
output_prefix="",
)
# %%
response = facts_client.assets.create_detached_prompt(
name="rag",
model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
task_id="retrieval_augmented_generation",
detached_information=detached_information,
description="rag",
prompt_details=prompt_template)
# %%
print(response)
# %%
評価指標の設定
execute_prompt_setup.py
# %%
#!pip install ibm_watson_openscale
#!pip install "pandas<=2.1.9"
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")
PROMPT_TEMPLATE_ASSET_ID = os.environ.get("PROMPT_TEMPLATE_ASSET_ID")
# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *
authenticator = CloudPakForDataAuthenticator(
url=CPD_URL,
username=CPD_USERNAME,
apikey=CPD_API_KEY,
disable_ssl_verification=True
)
wos_client = APIClient(
service_url=CPD_URL,
authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
data_mart_id
# %%
wos_client.version
# %%
from ibm_watson_openscale.base_classes import ApiRequestFailure
try:
wos_client.wos.add_instance_mapping(
service_instance_id=data_mart_id,
project_id=PROJECT_ID
)
except ApiRequestFailure as arf:
if arf.response.status_code == 409:
# Instance mapping already exists
pass
else:
raise arf
# %%
wos_client.data_marts.show()
# %%
language_code = "ja"
supporting_monitors = {
"generative_ai_quality": {
"parameters": {
"min_sample_size": 10,
"metrics_configuration": {
"faithfulness": {},
"answer_relevance": {},
"rouge_score": {},
"exact_match": {},
"bleu": {},
"unsuccessful_requests": {},
"hap_input_score": {},
"hap_score": {},
"pii": {
"language_code" : language_code
},
"pii_input": {
"language_code" : language_code
},
"retrieval_quality": {},
}
}
}
}
# %%
import json
print(json.dumps(supporting_monitors, indent=2, ensure_ascii=False))
# %%
language_code = "ja"
response = wos_client.wos.execute_prompt_setup(
prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
label_column="reference_text",
operational_space_id="development",
problem_type="retrieval_augmented_generation",
input_data_type="unstructured_text",
data_input_locale=[language_code],
generated_output_locale=[language_code],
project_id=PROJECT_ID,
context_fields=["context"],
question_field="question",
supporting_monitors=supporting_monitors,
background_mode=True)
response.result.to_dict()
# %%
response = wos_client.wos.get_prompt_setup(
prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
project_id=PROJECT_ID)
response.result.to_dict()
# %%
評価データセットの作成
git clone https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA
mkdir RAG-Evaluation-Dataset-JA/data
download_documents.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data
# %%
import pandas as pd
df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df
# %%
import os
import requests
for index, row in df.iterrows():
url = row['url']
file_name = row['file_name']
print(url)
file = f"data/{file_name}"
if os.path.exists(file):
continue
try:
buffer = requests.get(url=url, timeout=5)
with open(file=file, mode="wb") as f:
f.write(buffer.content)
except Exception as e:
print(f"{e}")
# %%
extract_context.py
# %%
#!pip install langchain_community
#!pip install pypdf
# %%
import pandas as pd
filepath = "RAG-Evaluation-Dataset-JA/rag_evaluation_result.csv"
df = pd.read_csv(filepath_or_buffer=filepath)
df
# %%
from langchain_community.document_loaders import PyPDFLoader
context = []
for index, row in df.iterrows():
print(index)
file_name = row["target_file_name"]
page_no = row["target_page_no"]
file_path = f"data/{file_name}"
try:
loader = PyPDFLoader(file_path=file_path)
documents = loader.load_and_split()
document = documents[page_no].page_content.replace("\n", "").replace(" ", "")
context.append(document)
except Exception as e:
context.append("")
print(f"{e}")
# %%
context
# %%
df["context"] = context
df
# %%
df["context"] != ""
# %%
import csv
df[df["context"] != ""].to_csv(path_or_buf="context.csv", index=False, quoting=csv.QUOTE_ALL)
# %%
rag.py
# %%
#!pip install python-dotenv
#!pip install azure.identity
#!pip install langchain_openai
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
AZURE_OPENAI_ENDPOINT = os.environ.get('AZURE_OPENAI_ENDPOINT')
API_VERSION = os.environ.get("API_VERSION")
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_CLIENT_ID = os.environ.get("AZURE_CLIENT_ID")
AZURE_CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET")
AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID")
# %%
from azure.identity import ClientSecretCredential, get_bearer_token_provider
credential = ClientSecretCredential(
tenant_id=AZURE_TENANT_ID,
client_id=AZURE_CLIENT_ID,
client_secret=AZURE_CLIENT_SECRET
)
# %%
scopes = "https://cognitiveservices.azure.com/.default"
azure_ad_token_provider = get_bearer_token_provider(credential, scopes)
# %%
from langchain_openai import AzureChatOpenAI
temperature = 0
max_tokens = 4096
llm = AzureChatOpenAI(
azure_endpoint=AZURE_OPENAI_ENDPOINT,
api_version=API_VERSION,
azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
azure_ad_token_provider=azure_ad_token_provider,
temperature=temperature,
max_tokens=max_tokens,
)
# %%
from langchain_core.prompts import PromptTemplate
template = """# 指示:
与えられた文脈にもとづいて質問に回答してください。
# 文脈:
{context}
# 質問:
{question}
# 回答:
"""
prompt_template = PromptTemplate.from_template(template=template)
# %%
#question = "私の名前は?"
#context = "はじめまして。私の名前はonoyu1012です。"
#response = llm.invoke(input=prompt_template.format(context=context, question=question))
#print(response)
# %%
generated_text.py
# %%
from rag import prompt_template, llm
# %%
import sys
argvs = sys.argv
x0 = int(argvs[1])
x1 = int(argvs[2])
print(x0, x1)
# %%
import pandas as pd
filepath = "context.csv"
df = pd.read_csv(filepath_or_buffer=filepath).iloc[x0:x1]
df
# %%
generated_text = []
for index, row in df.iterrows():
print(index)
question = row.question
context = row.context
try:
response = llm.invoke(input=prompt_template.format(context=context, question=question))
generated_text.append(response.content)
except Exception as e:
print(f"{e}")
generated_text.append("")
continue
# %%
df['generated_text'] = generated_text
df
# %%
df.columns
# %%
import csv
df[["question", "context", "target_answer", "generated_text"]].rename(columns={"target_answer": "reference_text"}).to_csv(path_or_buf="evaluate.csv", index=False, quoting=csv.QUOTE_ALL)
# %%
python generated_text.py 0 10
評価の実行
evaluate_risk.py
# %%
from dotenv import load_dotenv
load_dotenv(override=True)
# %%
import os
CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_PASSWORD = os.environ.get("CPD_PASSWORD")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")
# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *
authenticator = CloudPakForDataAuthenticator(
url=CPD_URL,
username=CPD_USERNAME,
password=CPD_PASSWORD,
disable_ssl_verification=True
)
wos_client = APIClient(
service_url=CPD_URL,
authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)
# %%
wos_client.monitor_instances.show(target_target_id=SUBSCRIPTION_ID)
# %%
monitor_definition_id = "mrm"
target_target_id = SUBSCRIPTION_ID
response = wos_client.monitor_instances.list(
data_mart_id=data_mart_id,
monitor_definition_id=monitor_definition_id,
target_target_id=target_target_id,
project_id=PROJECT_ID)
response.result.to_dict()
# %%
mrm_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
mrm_monitor_instance_id
# %%
test_data_path = "evaluate.csv"
test_data_set_name = "data"
content_type = "multipart/form-data"
response = wos_client.monitor_instances.mrm.evaluate_risk(
monitor_instance_id=mrm_monitor_instance_id,
test_data_set_name=test_data_set_name,
test_data_path=test_data_path,
content_type=content_type,
body=None,
project_id=PROJECT_ID,
includes_model_output=True,
background_mode=True
)
response.result.to_dict()
# %%
response = wos_client.monitor_instances.mrm.get_risk_evaluation(
monitor_instance_id=mrm_monitor_instance_id,
project_id=PROJECT_ID
)
response.result.to_dict()
# %%
#monitor_definition_id = "generative_ai_quality"
#response = wos_client.monitor_instances.list(
# data_mart_id=data_mart_id,
# monitor_definition_id=monitor_definition_id,
# target_target_id=target_target_id,
# project_id=PROJECT_ID
#)
#response.result.to_dict()
# %%
#gaiq_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
#gaiq_monitor_instance_id
# %%
#wos_client.monitor_instances.show_metrics(
# monitor_instance_id=gaiq_monitor_instance_id,
# project_id=PROJECT_ID
#)
# %%
#response = wos_client.data_sets.list(
# target_target_id=SUBSCRIPTION_ID,
# target_target_type="subscription",
# type="gen_ai_quality_metrics"
#)
#response.result.to_dict()
# %%
#gaiq_data_set_id = response.result.to_dict()["data_sets"][0]["metadata"]["id"]
#gaiq_data_set_id
# %%
#wos_client.data_sets.show_records(data_set_id = gaiq_data_set_id)
# %%
評価結果の確認