0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

watsonx.governanceでLLMアプリケーション(RAG)の評価

Last updated at Posted at 2025-05-15

Detached Prompt Templateの作成

create_detached_prompte_template.py
# %%
#!pip install ibm_aigov_facts_client setuptools
#!pip install tabulate
#!pip install python-dotenv

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")

AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")

# %%
from ibm_aigov_facts_client import AIGovFactsClient
from ibm_aigov_facts_client import CloudPakforDataConfig

creds=CloudPakforDataConfig(
    service_url=CPD_URL,
    username=CPD_USERNAME,
    api_key=CPD_API_KEY
)

# %%
facts_client = AIGovFactsClient(
    cloud_pak_for_data_configs=creds,
    container_id=PROJECT_ID,
    container_type="project",
    disable_tracing=True
)

# %%
from ibm_aigov_facts_client import DetachedPromptTemplate, PromptTemplate

detached_information = DetachedPromptTemplate(
    prompt_id="rag",
    model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
    model_provider="azure",
    model_name=AZURE_OPENAI_DEPLOYMENT_NAME,
    model_url=AZURE_OPENAI_ENDPOINT,
    prompt_url="none",
    prompt_additional_info={"model_owner": "openai"}
)

# %%
from rag import template

prompt_template = PromptTemplate(
    input=template,
    prompt_variables={"context": "", "question": ""},
    input_prefix="",
    output_prefix="",
)

# %%
response = facts_client.assets.create_detached_prompt(
    name="rag",
    model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
    task_id="retrieval_augmented_generation",
    detached_information=detached_information,
    description="rag",
    prompt_details=prompt_template)

# %%
print(response)

# %%

評価指標の設定

execute_prompt_setup.py
# %%
#!pip install ibm_watson_openscale
#!pip install "pandas<=2.1.9"

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")

PROMPT_TEMPLATE_ASSET_ID = os.environ.get("PROMPT_TEMPLATE_ASSET_ID")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    apikey=CPD_API_KEY,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
data_mart_id

# %%
wos_client.version

# %%
from ibm_watson_openscale.base_classes import ApiRequestFailure

try:
  wos_client.wos.add_instance_mapping(                
    service_instance_id=data_mart_id,
    project_id=PROJECT_ID
  )
except ApiRequestFailure as arf:
    if arf.response.status_code == 409:
      # Instance mapping already exists
      pass
    else:
      raise arf

# %%
wos_client.data_marts.show()

# %%
language_code = "ja"

supporting_monitors = {
    "generative_ai_quality": {
        "parameters": {
            "min_sample_size": 10,
            "metrics_configuration": {
                "faithfulness": {},
                "answer_relevance": {},
                "rouge_score": {},
                "exact_match": {},
                "bleu": {},
                "unsuccessful_requests": {},
                "hap_input_score": {},
                "hap_score": {},
                "pii": {
                   "language_code" : language_code
                },
                "pii_input": {
                   "language_code" : language_code
                },
                "retrieval_quality": {},
           }
        }
    }
}

# %%
import json

print(json.dumps(supporting_monitors, indent=2, ensure_ascii=False))

# %%
language_code = "ja"

response = wos_client.wos.execute_prompt_setup(
    prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
	label_column="reference_text",
	operational_space_id="development",
	problem_type="retrieval_augmented_generation",
	input_data_type="unstructured_text",
    data_input_locale=[language_code],
    generated_output_locale=[language_code],
	project_id=PROJECT_ID,
    context_fields=["context"],     
    question_field="question",
	supporting_monitors=supporting_monitors,
	background_mode=True)
response.result.to_dict()

# %%
response = wos_client.wos.get_prompt_setup(
    prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
    project_id=PROJECT_ID)
response.result.to_dict()

# %%

評価データセットの作成

git clone https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA
mkdir RAG-Evaluation-Dataset-JA/data
download_documents.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data

# %%
import pandas as pd

df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df

# %%
import os
import requests

for index, row in df.iterrows():
    url = row['url']
    file_name = row['file_name']
    print(url)
    file = f"data/{file_name}"
    if os.path.exists(file):
        continue
    try:
        buffer = requests.get(url=url, timeout=5)
        with open(file=file, mode="wb") as f:
            f.write(buffer.content)
    except Exception as e:
        print(f"{e}")

# %%
extract_context.py
# %%
#!pip install langchain_community
#!pip install pypdf

# %%
import pandas as pd

filepath = "RAG-Evaluation-Dataset-JA/rag_evaluation_result.csv"
df = pd.read_csv(filepath_or_buffer=filepath)
df

# %%
from langchain_community.document_loaders import PyPDFLoader

context = []
for index, row in df.iterrows():
    print(index)
    file_name = row["target_file_name"]
    page_no = row["target_page_no"]

    file_path = f"data/{file_name}"
    try:
        loader = PyPDFLoader(file_path=file_path)
        documents = loader.load_and_split()
        document = documents[page_no].page_content.replace("\n", "").replace(" ", "")
        context.append(document)
    except Exception as e:
        context.append("")
        print(f"{e}")

# %%
context

# %%
df["context"] = context
df

# %%
df["context"] != ""

# %%
import csv

df[df["context"] != ""].to_csv(path_or_buf="context.csv", index=False, quoting=csv.QUOTE_ALL)

# %%
rag.py
# %%
#!pip install python-dotenv
#!pip install azure.identity
#!pip install langchain_openai

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

AZURE_OPENAI_ENDPOINT = os.environ.get('AZURE_OPENAI_ENDPOINT')
API_VERSION = os.environ.get("API_VERSION")
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_CLIENT_ID = os.environ.get("AZURE_CLIENT_ID")
AZURE_CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET")
AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID")

# %%
from azure.identity import ClientSecretCredential, get_bearer_token_provider

credential = ClientSecretCredential(
    tenant_id=AZURE_TENANT_ID,
    client_id=AZURE_CLIENT_ID,
    client_secret=AZURE_CLIENT_SECRET
)

# %%
scopes = "https://cognitiveservices.azure.com/.default"
azure_ad_token_provider = get_bearer_token_provider(credential, scopes)

# %%
from langchain_openai import AzureChatOpenAI

temperature = 0
max_tokens = 4096
llm = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=API_VERSION,
    azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
    azure_ad_token_provider=azure_ad_token_provider,
    temperature=temperature,
    max_tokens=max_tokens,
)

# %%
from langchain_core.prompts import PromptTemplate

template = """# 指示:
与えられた文脈にもとづいて質問に回答してください。

# 文脈:
{context}

# 質問:
{question}

# 回答:
"""

prompt_template = PromptTemplate.from_template(template=template)
# %%
#question = "私の名前は?"
#context = "はじめまして。私の名前はonoyu1012です。"
#response = llm.invoke(input=prompt_template.format(context=context, question=question))
#print(response)
# %%

generated_text.py
# %%
from rag import prompt_template, llm

# %%
import sys

argvs = sys.argv
x0 = int(argvs[1])
x1 = int(argvs[2])
print(x0, x1)


# %%
import pandas as pd

filepath = "context.csv"
df = pd.read_csv(filepath_or_buffer=filepath).iloc[x0:x1]
df

# %%
generated_text = []
for index, row in df.iterrows():
    print(index)
    question = row.question
    context = row.context
    try:
        response = llm.invoke(input=prompt_template.format(context=context, question=question))
        generated_text.append(response.content)
    except Exception as e:
        print(f"{e}")
        generated_text.append("")
        continue

# %%
df['generated_text'] = generated_text
df

# %%
df.columns

# %%
import csv

df[["question", "context", "target_answer", "generated_text"]].rename(columns={"target_answer": "reference_text"}).to_csv(path_or_buf="evaluate.csv", index=False, quoting=csv.QUOTE_ALL)

# %%
python generated_text.py 0 10

image.png

評価の実行

evaluate_risk.py
# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_PASSWORD = os.environ.get("CPD_PASSWORD")
CPD_API_KEY = os.environ.get("CPD_API_KEY")

PROJECT_ID = os.environ.get("PROJECT_ID")
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    password=CPD_PASSWORD,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)

# %%
wos_client.monitor_instances.show(target_target_id=SUBSCRIPTION_ID)

# %%
monitor_definition_id = "mrm"
target_target_id = SUBSCRIPTION_ID
response = wos_client.monitor_instances.list(
  data_mart_id=data_mart_id,
  monitor_definition_id=monitor_definition_id,
  target_target_id=target_target_id,
  project_id=PROJECT_ID)
response.result.to_dict()

# %%
mrm_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
mrm_monitor_instance_id

# %%
test_data_path = "evaluate.csv"
test_data_set_name = "data"
content_type = "multipart/form-data"

response  = wos_client.monitor_instances.mrm.evaluate_risk(
    monitor_instance_id=mrm_monitor_instance_id,
    test_data_set_name=test_data_set_name, 
    test_data_path=test_data_path,
    content_type=content_type,
    body=None,
    project_id=PROJECT_ID,
    includes_model_output=True,
    background_mode=True
)
response.result.to_dict()

# %%
response = wos_client.monitor_instances.mrm.get_risk_evaluation(
    monitor_instance_id=mrm_monitor_instance_id,
    project_id=PROJECT_ID
)
response.result.to_dict()

# %%
#monitor_definition_id = "generative_ai_quality"
#response = wos_client.monitor_instances.list(
#  data_mart_id=data_mart_id,
#  monitor_definition_id=monitor_definition_id,
#  target_target_id=target_target_id,
#  project_id=PROJECT_ID
#)
#response.result.to_dict()

# %%
#gaiq_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
#gaiq_monitor_instance_id

# %%
#wos_client.monitor_instances.show_metrics(
#  monitor_instance_id=gaiq_monitor_instance_id,
#  project_id=PROJECT_ID
#)

# %%
#response = wos_client.data_sets.list(
#  target_target_id=SUBSCRIPTION_ID,
#  target_target_type="subscription",
#  type="gen_ai_quality_metrics"
#)
#response.result.to_dict()

# %%
#gaiq_data_set_id = response.result.to_dict()["data_sets"][0]["metadata"]["id"]
#gaiq_data_set_id
# %%
#wos_client.data_sets.show_records(data_set_id = gaiq_data_set_id)

# %%

評価結果の確認

image.png

image.png

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?