0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

watsonxによるプロンプトを日本語で評価

Last updated at Posted at 2025-04-10

準備

# %%
!pip install python-dotenv
!pip install ibm_watsonx_ai
!pip install ibm_watson_openscale setuptools

import os
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
WML_CREDENTIALS = {
	"url": os.environ.get("URL"),
	"apikey": os.environ.get("CLOUD_API_KEY")
}
WML_CREDENTIALS

# %%
project_id = os.environ.get("PROJECT_ID")
project_id

Prompt templateの作成 ← GUIでも可能

from ibm_watsonx_ai.foundation_models.prompts import PromptTemplateManager
from ibm_watsonx_ai.foundation_models.prompts import PromptTemplate
from ibm_watsonx_ai.foundation_models.utils.enums import ModelTypes

prompt_mgr = PromptTemplateManager(
	credentials=WML_CREDENTIALS,
	project_id=project_id
)

prompt_template = PromptTemplate(
    name="MLS_Car_Insurance_Summarization_Ja_Demo",
    model_id=ModelTypes.GRANITE_13B_INSTRUCT_V2,
    task_ids=["summarization"],
    instruction="以下の保険請求を最大 3 文で要約してください",
    input_text="{input_text}",
    input_variables=['input_text'],
    examples = [
        [
            "2023年11月1日午前11時、私の車両「日産 マキシマ 1998年式」は、ニューヨークで重大事故に遭いました。私の運転手は制限速度を守って走行していましたが、前の車が急停止したために前方から衝突しました。激しい衝撃により双方の車が大破し、私の運転手はすぐに救急に連絡をして、その場で応急処置を受けました。私の車両は前のバンパー、ボンネット、フロントガラスに損傷を受けました。私の運転手は重傷を負い、現在治療を受けています。私はあなたの代理人にただちに連絡し、事故の詳細と警察の報告書、医療診察書を添えて請求書を提出しました。また目撃者の供述書と破損した車両の写真も提出します。",
            "私の車両はニューヨークで重大事故に遭いました。私の運転手は重傷を負い、現在治療を受けています。"
        ]
    ]
)

stored_prompt_template = prompt_mgr.store_prompt(prompt_template=prompt_template)
stored_prompt_template

prompt_template_asset_id = stored_prompt_template.prompt_id
prompt_template_asset_id

image.png

Prompt templateの設定 ← 日本語でやるなら現状PythonSDK

# %%
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

service_instance_id = os.environ.get("SERVICE_INSTANCE_ID")
authenticator = IAMAuthenticator(apikey=os.environ.get("CLOUD_API_KEY"))
wos_client = APIClient(authenticator=authenticator, service_instance_id=service_instance_id)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)

wos_client.data_marts.show()

language_code = "ja"

supporting_monitors = {
    "generative_ai_quality": {
        "parameters": {
            "min_sample_size": 10,
            "metrics_configuration": {
#                "rouge_score": {},
#                "normalized_precision": {},
#                "normalized_f1": {},
#                "normalized_recall": {},
#                "cosine_similarity": {},
#                "jaccard_similarity": {},
#                "sari": {},
#                "meteor": {},
#                "hap_score": {},
#                "pii": {
#                    "language_code" : language_code
#                }
            }
        }
    }
}

label_column = "reference_summary"
operational_space_id = "development"
problem_type= "summarization"
input_data_type= "unstructured_text"

response = wos_client.wos.execute_prompt_setup(
    prompt_template_asset_id=prompt_template_asset_id,
	project_id=project_id,
	label_column=label_column,
	operational_space_id=operational_space_id,
	problem_type=problem_type,
	input_data_type=input_data_type,
	supporting_monitors=supporting_monitors,
    data_input_locale=[language_code],
    generated_output_locale=[language_code],
	background_mode=True)
response.result.to_dict()

response = wos_client.wos.get_prompt_setup(
    prompt_template_asset_id=prompt_template_asset_id,
    project_id=project_id)
response.result.to_dict()

subscription_id = response.result.to_dict()['subscription_id']
subscription_id

wos_client.monitor_instances.show(target_target_id=subscription_id)

検証

filename = "llm_content_summarization_ja.csv"
!rm -fr "llm_content_summarization_ja.csv"
!wget "https://raw.githubusercontent.com/IBM/watson-openscale-samples/refs/heads/main/IBM%20Cloud/WML/assets/data/watsonx/Multi_Lingual_Support/llm_content_summarization_ja.csv"


# %%
import pandas as pd

df = pd.read_csv(filename)
df

# %%
df = df.rename(columns={'generated_predictions': 'generated_text'})
df

# %%
new_summarization_filename = "new_summarization_data_ja.csv"
df.to_csv(new_summarization_filename, index=False)
test_data_path = new_summarization_filename

# %%
import pandas as pd

df = pd.read_csv(test_data_path)
df.head()
# %% Read the MRM monitor instance id
monitor_definition_id = "mrm"
target_target_id = subscription_id
response = wos_client.monitor_instances.list(
    data_mart_id=data_mart_id,
    monitor_definition_id=monitor_definition_id,
    target_target_id=target_target_id,
    project_id=project_id)
response.result.to_dict()

# %%
mrm_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
mrm_monitor_instance_id
# %%
test_data_set_name = "data"
content_type = "multipart/form-data"

response = wos_client.monitor_instances.mrm.evaluate_risk(
    monitor_instance_id=mrm_monitor_instance_id, 
    test_data_set_name=test_data_set_name, 
    test_data_path=test_data_path,
    content_type=content_type,
    body=None,
    includes_model_output=True,
    project_id=project_id,
    background_mode=True)
response.result.to_dict()

# %%
response = wos_client.monitor_instances.mrm.get_risk_evaluation(
    monitor_instance_id=mrm_monitor_instance_id, project_id=project_id)
response.result.to_dict()

生成AI性能指標の可視化

# %% 生成AI性能指標の可視化
monitor_definition_id = "generative_ai_quality"
response = wos_client.monitor_instances.list(
    data_mart_id=data_mart_id,
    monitor_definition_id=monitor_definition_id,
    target_target_id=target_target_id,
    project_id=project_id)
response.result.to_dict()

# %%
gaiquality_monitor_instance_id = response.result.to_dict()["monitor_instances"][0]["metadata"]["id"]
gaiquality_monitor_instance_id

# %%
wos_client.monitor_instances.show_metrics(monitor_instance_id=gaiquality_monitor_instance_id, project_id=project_id)

Display record level metrics for Generative AI Quality

# %% Display record level metrics for Generative AI Quality
response = wos_client.data_sets.list(
    target_target_id=subscription_id,
    target_target_type="subscription",
    type="gen_ai_quality_metrics")
response.result.to_dict()

# %%
gaiquality_dataset_id = response.result.to_dict()['data_sets'][0]['metadata']['id']
gaiquality_dataset_id

# %%
wos_client.data_sets.show_records(data_set_id=gaiquality_dataset_id)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?