0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

商用のLLMを利用したOSSのLLMをwatsonx.governanaceで評価(備忘録)

Posted at

データセットのダウンロード

git clone https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA

ドキュメントのダウンロード

download_documents.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data

# %%
import pandas as pd

df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df

# %%
import os
import requests

for index, row in df.iterrows():
    url = row['url']
    file_name = row['file_name']
    print(url)
    file = f"data/{file_name}"
    if os.path.exists(file):
        continue
    try:
        buffer = requests.get(url=url, timeout=10)
        with open(file=file, mode="wb") as f:
            f.write(buffer.content)
    except Exception as e:
        print(f"{e}")

# %%

ドキュメントからコンテキストを抽出

extract_context.py
# %%
#!pip install pandas
#!pip install requests
#!mkdir data

# %%
import pandas as pd

df = pd.read_csv("RAG-Evaluation-Dataset-JA/documents.csv")
df

# %%
import os
import requests

for index, row in df.iterrows():
    url = row['url']
    file_name = row['file_name']
    print(url)
    file = f"data/{file_name}"
    if os.path.exists(file):
        continue

    try:
        buffer = requests.get(url=url, timeout=10)
        with open(file=file, mode="wb") as f:
            f.write(buffer.content)
    except Exception as e:
        print(f"{e}")

# %%

Azure Open AI/GPT-3.5-turboによる回答の生成=実績値

templates.py
# %%
template = """# 指示:
与えられた文脈にもとづいて質問に回答してください。

# 文脈:
{context}

# 質問:
{question}

# 回答:
"""
reference_text.py
# %%
#!pip install azure.identity
#!pip install langchain_openai

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID")
AZURE_CLIENT_ID = os.environ.get("AZURE_CLIENT_ID")
AZURE_CLIENT_SECRET = os.environ.get("AZURE_CLIENT_SECRET")
AZURE_OPENAI_ENDPOINT = os.environ.get('AZURE_OPENAI_ENDPOINT')
AZURE_OPENAI_DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME")
API_VERSION = os.environ.get("API_VERSION")

# %%
from azure.identity import ClientSecretCredential, get_bearer_token_provider

credential = ClientSecretCredential(
    tenant_id=AZURE_TENANT_ID,
    client_id=AZURE_CLIENT_ID,
    client_secret=AZURE_CLIENT_SECRET
)

# %%
scopes = "https://cognitiveservices.azure.com/.default"
azure_ad_token_provider = get_bearer_token_provider(credential, scopes)

# %%
from langchain_openai import AzureChatOpenAI

temperature = 0
max_tokens = 4096
llm = AzureChatOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_version=API_VERSION,
    azure_deployment=AZURE_OPENAI_DEPLOYMENT_NAME,
    azure_ad_token_provider=azure_ad_token_provider,
    temperature=temperature,
    max_tokens=max_tokens,
)

# %%
from templates import template
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(template=template)

# %%
import pandas as pd

df = pd.read_csv("context.csv")
df

# %%
df.dropna(subset=["context"], inplace=True)
df.shape

# %%
reference_text = []
for index, row in df.iterrows():
    print(index)
    question = row["question"]
    context = row["context"]
    input = prompt_template.format(question=question, context=context)
    print(input)
    try:
        response = llm.invoke(input=input)
        reference_text.append(response.content)
    except Exception as e:
        reference_text.append("")
        print(f"{e}")

# %%
reference_text

# %%
df["reference_text"] = reference_text
df

# %%
import csv

df.to_csv(path_or_buf="reference_text.csv", index=False, quoting=csv.QUOTE_ALL)

# %%

watsonx.ai/Mistral Largeによる回答の生成=予測値

genrated_text.py
# %%
from langchain_ibm import WatsonxLLM

model_id = 'mistralai/mistral-large'
project_id = PROJECT_ID
url = 'https://us-south.ml.cloud.ibm.com/'
apikey = API_KEY
params = {
	'decoding_method': 'greedy',
	'max_new_tokens': 16384,
	'min_new_tokens': 1,
	'repetition_penalty': 1.02,
    "stop_sequences": ["</s>"]
}
llm = WatsonxLLM(
    model_id=model_id,
    project_id=project_id,
    url=url,
    params=params,
    apikey=apikey
)

# %%
import pandas as pd

df = pd.read_csv("reference_text.csv")
df

# %%
from templates import template
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template(template=template)

# %%
generated_text = []
for index, row in df.iterrows():
    print(index)
    question = row["question"]
    context = row["context"]
    input = prompt_template.format(question=question, context=context)
#    print(input)
    try:
        response = llm.invoke(input=input)
        generated_text.append(response)
    except Exception as e:
        generated_text.append("")
        print(f"{e}")

# %%
generated_text

# %%
df["generated_text"] = generated_text
df

# %%
import csv

df.to_csv(path_or_buf="generated_text.csv", index=False, quoting=csv.QUOTE_ALL)

# %%

評価

実績値=GPT-3.5-turboに対する推定値=mistral large2の評価

image.png

evaluate.py
# %%
#!pip install -U ibm_watson_openscale
#!pip install -U "ibm-metrics-plugin[generative-ai-quality]~=3.0.11"
#!pip install --upgrade shap==0.42.1

# %%
#import spacy

#spacy.cli.download("en_core_web_sm")
#spacy.cli.download("ja_core_news_sm")
#!python -m nltk.downloader punkt

# %%
from dotenv import load_dotenv

load_dotenv(override=True)

# %%
import os

CPD_URL = os.environ.get("CPD_URL")
CPD_USERNAME = os.environ.get("CPD_USERNAME")
CPD_API_KEY = os.environ.get("CPD_API_KEY")

# %%
from ibm_cloud_sdk_core.authenticators import CloudPakForDataAuthenticator
from ibm_watson_openscale import *
from ibm_watson_openscale.supporting_classes.enums import *
from ibm_watson_openscale.supporting_classes import *

authenticator = CloudPakForDataAuthenticator(
    url=CPD_URL,
    username=CPD_USERNAME,
    apikey=CPD_API_KEY,
    disable_ssl_verification=True
)

wos_client = APIClient(
    service_url=CPD_URL,
    authenticator=authenticator,
)
data_mart_id = wos_client.service_instance_id
print(data_mart_id)
print(wos_client.version)

# %%
import pandas as pd

df = pd.read_csv("generated_text.csv").iloc[:10]
df

# %%
sources = df[["context", "question"]].copy()
sources

# %%
predictions = df[["generated_text"]].copy()
predictions

# %%
references = df[["reference_text"]].copy()
references

# %%
language_code = "ja"

configuration = {
    "configuration": {
        "record_level":False,
        "context_columns":["context"],
        "question_column": "question",
        "retrieval_augmented_generation": {
            "content_analysis": {},
            "faithfulness": {},
            "answer_relevance": {},
            "unsuccessful_requests": {},
            "hap_score": {},
            "pii": {
                "language_code" : language_code
            }
        },
        "language_code" : language_code
    }
}

# %%
print(configuration)

# %%
result = wos_client.llm_metrics.compute_metrics(configuration, sources, predictions, references)
result

# %%
import json

metrics = wos_client.llm_metrics.get_metrics_result(configuration, result)
while "in_progress" in json.dumps(metrics):
    print("in_progress")
    metrics = wos_client.llm_metrics.get_metrics_result(configuration, result)
metrics

# %%
{'answer_relevance': {'total_records': 10,
  'max': 0.9669,
  'mean': 0.9135,
  'metric_value': 0.9135,
  'min': 0.7612},
 'faithfulness': {'total_records': 10,
  'max': 0.0668,
  'mean': 0.0409,
  'metric_value': 0.0409,
  'min': 0.0151},
 'hap_score': {'total_records': 10,
  'max': 0.03231289982795715,
  'mean': 0.0074,
  'metric_value': 0.0074,
  'min': 0.002128486754372716},
 'pii': {'total_records': 10,
  'max': 0,
  'mean': 0.0,
  'metric_value': 0.0,
  'min': 0},
 'content_analysis': {'coverage': {'metric_value': 0.0416,
   'mean': 0.0416,
   'min': 0.0,
   'max': 0.2,
   'std': 0.0621},
  'density': {'metric_value': 0.001,
   'mean': 0.001,
   'min': 0.0,
   'max': 0.0067,
   'std': 0.002},
  'abstractness': {'metric_value': 0.7719,
   'mean': 0.7719,
   'min': 0.4194,
   'max': 1.0,
   'std': 0.2401}},
 'unsuccessful_requests': {'metric_value': 0.0,
  'mean': 0.0,
  'min': 0,
  'max': 0,
  'std': 0.0}}
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?