0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

日本語でHAP&PII@watsonx.governanceのテスト備忘録

Last updated at Posted at 2025-04-16
templates.py
# %%
template_rag = """# 指示:
与えられた文脈にもとづいて質問に回答してください。

# 文脈:
{context}

# 質問:
{question}

# 回答:
"""
# %%

create_detached_prompte_template.py
# %%
#!pip install ibm_aigov_facts_client
#!pip install tabulate

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")
PROJECT_ID = os.environ.get("PROJECT_ID")

# %%
from ibm_aigov_facts_client import AIGovFactsClient
from ibm_aigov_facts_client import CloudPakforDataConfig

creds=CloudPakforDataConfig(
    service_url=CPD_URL,
    username=CPD_USERNAME,
    api_key=CPD_API_KEY
)

facts_client = AIGovFactsClient(
    cloud_pak_for_data_configs=creds,
    container_id=PROJECT_ID,
    container_type="project",
    disable_tracing=True
)

# %%
from ibm_aigov_facts_client import DetachedPromptTemplate, PromptTemplate

AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")

detached_information = DetachedPromptTemplate(
    prompt_id="rag-azure-openai-gpt-35-turbo",
    model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
    model_provider="azure-ml-openai",
    model_name=AZURE_OPENAI_DEPLOYMENT_NAME,
    model_url=AZURE_OPENAI_ENDPOINT,
    prompt_url="prompt_url",
    prompt_additional_info={"model_owner": "Microsoft"}
)

# %%
from templates import template_rag

prompt_template = PromptTemplate(
    input=template_rag,
    prompt_variables={"context": "", "question": ""},
    input_prefix="",
    output_prefix="",
)

# %%
response = facts_client.assets.create_detached_prompt(
    name="rag-azure-openai-gpt-35-turbo",
    model_id=AZURE_OPENAI_DEPLOYMENT_NAME,
    task_id="retrieval_augmented_generation",
    detached_information=detached_information,
    description="RAG based on Azure OpenAI GPT-3.5-turbo",
    prompt_details=prompt_template)

# %%
response

# %%


execute_detached_prompte_template.py
# %%
#!pip install ibm_watson_openscale
#!pip install "pandas<=2.1.9"

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_PASSWORD = os.environ.get("CPD_PASSWORD")
CPD_API_KEY = os.environ.get("CPD_API_KEY")

PROJECT_ID = os.environ.get("PROJECT_ID")
PROMPT_TEMPLATE_ASSET_ID = os.environ.get("PROMPT_TEMPLATE_ASSET_ID")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    password=CPD_PASSWORD,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)

# %%
from ibm_watson_openscale.base_classes import ApiRequestFailure

try:
  wos_client.wos.add_instance_mapping(                
    service_instance_id=data_mart_id,
    project_id=PROJECT_ID
  )
except ApiRequestFailure as arf:
    if arf.response.status_code == 409:
      # Instance mapping already exists
      pass
    else:
      raise arf

# %%
wos_client.data_marts.show()

# %%
from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup, LLMCommonMetrics, LLMRAGMetrics, RetrievalQualityMetric

supporting_monitors = {
    "generative_ai_quality": {
        "parameters": {
            "min_sample_size": 10,
            "metrics_configuration": {
            }
        }
    }
}
import json
print(json.dumps(supporting_monitors, indent=2, ensure_ascii=False))

# %%
language_code = "ja"

response = wos_client.wos.execute_prompt_setup(
    prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
	label_column="reference_text",
	operational_space_id="development",
	problem_type="retrieval_augmented_generation",
	input_data_type="unstructured_text",
    data_input_locale=[language_code],
    generated_output_locale=[language_code],
	project_id=PROJECT_ID,
    context_fields=["context"],     
    question_field="question",
	supporting_monitors=supporting_monitors,
	background_mode=True)
response.result.to_dict()

# %%
response = wos_client.wos.get_prompt_setup(
    prompt_template_asset_id=PROMPT_TEMPLATE_ASSET_ID,
    project_id=PROJECT_ID)
response.result.to_dict()

# %%

execute_detached_prompte_template_production.py
# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_PASSWORD = os.environ.get("CPD_PASSWORD")
CPD_API_KEY = os.environ.get("CPD_API_KEY")

PROJECT_ID = os.environ.get("PROJECT_ID")

SPACE_ID = os.environ.get("SPACE_ID")
PROMPTE_TEMPLATE_ASSET_ID = os.environ.get("PROMPT_TEMPLATE_ASSET_ID_PRODUCTION")
DEPLOYMENT_ID = os.environ.get("DEPLOYMENT_ID")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    password=CPD_PASSWORD,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)

# %%
from ibm_watson_openscale.base_classes import ApiRequestFailure

try:
  wos_client.wos.add_instance_mapping(                
    service_instance_id=data_mart_id,
    space_id=SPACE_ID     # <- ここが大切!
  )
except ApiRequestFailure as arf:
    if arf.response.status_code == 409:
      # Instance mapping already exists
      pass
    else:
      raise arf

# %%
supporting_monitors = {
    "generative_ai_quality": {
        "parameters": {
            "min_sample_size": 10,
            "metrics_configuration": {
            }
        }
    }
}

# %%
language_code = "ja"

response = wos_client.wos.execute_prompt_setup(
    prompt_template_asset_id=PROMPTE_TEMPLATE_ASSET_ID,
    space_id=SPACE_ID,
    deployment_id=DEPLOYMENT_ID,
	label_column="reference_text",
	operational_space_id="production",
	problem_type="retrieval_augmented_generation",
	input_data_type="unstructured_text",
    data_input_locale=[language_code],
    generated_output_locale=[language_code],
    context_fields=["context"],     
    question_field="question",
	supporting_monitors=supporting_monitors,
	background_mode=True)
response.result.to_dict()

# %%
response = wos_client.monitor_instances.mrm.get_prompt_setup(
    prompt_template_asset_id=PROMPTE_TEMPLATE_ASSET_ID,
    deployment_id=DEPLOYMENT_ID,
    space_id=SPACE_ID)
response.result.to_dict()

# %%

compute_metrics.py
# %%
!pip install -U ibm_watson_openscale
!pip install -U "ibm-metrics-plugin[generative-ai-quality]~=3.0.11"
!pip install python-dotenv
!pip install certifi

# %%
import spacy

spacy.cli.download("en_core_web_sm")
spacy.cli.download("ja_core_news_sm")
#!python -m nltk.downloader punkt
 
# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CLOUD_API_KEY = os.environ.get("CLOUD_API_KEY")
SERVICE_INSTANCE_ID = os.environ.get("SERVICE_INSTANCE_ID")

# %%
import certifi

os.environ["REQUESTS_CA_BUNDLE"] = certifi.where()

# %%
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = IAMAuthenticator(apikey=CLOUD_API_KEY)
client = APIClient(authenticator=authenticator, service_instance_id=SERVICE_INSTANCE_ID)
client.version

# %%
from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup, LLMCommonMetrics, LLMRAGMetrics, RetrievalQualityMetric

# %%
import json

language_code = "ja"

configuration = {
    "configuration": {
        "record_level":False,
        "context_columns":["context"],
        "question_column": "generated_text",
        LLMTextMetricGroup.RAG.value: {
            LLMCommonMetrics.HAP_SCORE.value: {},
            LLMCommonMetrics.PII_DETECTION.value: {
                "language_code" : language_code
            }
        },
        "language_code" : language_code
    },
}
print(json.dumps(configuration, indent=2, ensure_ascii=False))

# %%
#import pandas as pd

#try:
#    file_path = "pii_high.json"
#    with open(file_path, "r", encoding="utf-8") as f:
#        data = json.load(f)
#    dataset = pd.json_normalize(data=data)
#except Exception as e:
#    print(f"{e}")
#dataset

# %%
#sources = dataset[["context", "question"]].copy()
#predictions = dataset[["generated_text"]].copy()
#references = dataset[["reference_text"]].copy()

# %%
#result = client.llm_metrics.compute_metrics(configuration, sources, predictions, references)
#result

# %%
#metrics = client.llm_metrics.get_metrics_result(configuration, result)
#metrics

# %%
#import time

#while "in_progress" in json.dumps(metrics):
#    print("in_progress")
#    time.sleep(4)
#    metrics = client.llm_metrics.get_metrics_result(configuration, result)
#metrics

# %%
#print(json.dumps(metrics, indent=2, ensure_ascii=False))
payload.py
# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_PASSWORD = os.environ.get("CPD_PASSWORD")
CPD_API_KEY = os.environ.get("CPD_API_KEY")

PROJECT_ID = os.environ.get("PROJECT_ID")

SPACE_ID = os.environ.get("SPACE_ID")
SUBSCRIPTION_ID = os.environ.get("SUBSCRIPTION_ID_PRODUCTION")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    password=CPD_PASSWORD,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
data_mart_id

# %%
wos_client.version

# %%
from ibm_watson_openscale.supporting_classes.enums import *

data_set_id = None
response = wos_client.data_sets.list(
    type=DataSetTypes.PAYLOAD_LOGGING,
    target_target_id=SUBSCRIPTION_ID,
    target_target_type=TargetTypes.SUBSCRIPTION)
response.result.to_dict()

# %%
data_set_id = response.result.to_dict()['data_sets'][1]['metadata']['id']
data_set_id

# %%
#import json
#import pandas as pd

#with open(file="hap_low_pii_low.json", mode="r", encoding="utf-8") as f:
#    data = json.load(f)
#dataset = pd.json_normalize(data=data)
#dataset

#request_body = []
#for index, row in dataset.iterrows():
#    print(index)
#    question = row.question
#    context = row.context
#    generated_text = row.generated_text
#    tmp = {
#        "request": {
#            "parameters": {
#                "template_variables": {
#                    "context": context,
#                    "question": question
#                }
#            }
#        },
#        "response": {
#            "results": [
#                {
#                    "generated_text": generated_text
#                }
#            ]
#        }
#    }
#    request_body.append(tmp)

# %%
#print(json.dumps(request_body, indent=2, ensure_ascii=False))

# %%
#response = wos_client.data_sets.store_records(
#   data_set_id=data_set_id,
#   request_body=request_body,
#   background_mode=True)
#response.result.to_dict()

# %%
#wos_client.data_sets.get_records_count(data_set_id=data_set_id)

# %%

app.py
# %%
#!pip install gradio

# %%
import gradio as gr
import json
import pandas as pd
from payload import wos_client, data_set_id

# %%
def rag(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        dataset = pd.json_normalize(data=data)

        # %%
        request_body = []
        for index, row in dataset.iterrows():
            #print(index)
            question = row.question
            context = row.context
            generated_text = row.generated_text
            tmp = {
                "request": {
                    "parameters": {
                        "template_variables": {
                            "context": context,
                            "question": question
                        }
                    }
                },
                "response": {
                    "results": [
                        {
                            "generated_text": generated_text
                        }
                    ]
                }
            }
            request_body.append(tmp)

        # %%
        #print(json.dumps(request_body, indent=2, ensure_ascii=False))

        # %%
        response = wos_client.data_sets.store_records(
           data_set_id=data_set_id,
           request_body=request_body,
           background_mode=True)
        print(response.result.to_dict())

        # %%
        wos_client.data_sets.get_records_count(data_set_id=data_set_id)

    except Exception as e:
        print(f"{e}")
    return dataset

# %%
with gr.Blocks() as app:
    filepath = gr.File(label="json", file_types=[".json"])
    button = gr.Button(value="compute_metrics")
    dataset = gr.DataFrame(label="dataset", wrap=True)
#    status = gr.Json(label="status")
    button.click(fn=rag, inputs=[filepath], outputs=[dataset])

# %%
app.launch(server_name="0.0.0.0")
#app.launch(server_name="0.0.0.0", server_port=8080)

image.png

PIIの検知は少し甘い印象。

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?