先ず、watsonx.ai/watson studioでcontext、質問、回答、生成された回答(generated_text)を含む検証データを作成。以下では、neural-bridge/rag-dataset-12000のContextと質問より回答を生成して、検証データを作成している。
!pip install --upgrade openai azure-identity
from datasets import load_dataset
ds = load_dataset("neural-bridge/rag-dataset-12000")
ds
import pandas as pd
df = pd.DataFrame(ds['test']).head(n=10)
df
AZURE_OPENAI_ENDPOINT = "<INPUT YOUR AZURE_OPENAI_ENDPOINT>"
AZURE_OPENAI_DEPLOYMENT_NAME = "<INPUT YOUR AZURE_OPENAI_DEPLOYMENT_NAME>"
AZURE_CLIENT_ID = "<INPUT YOUR AZURE_CLIENT_ID>"
AZURE_CLIENT_SECRET = "<INPUT YOUR AZURE_CLIENT_SECRET>"
AZURE_TENANT_ID = "<INPUT YOUR AZURE_TENANT_ID>"
PROMPT_TEMPLATE = """# System:
Answer the question based on the context.
# Context:
{context}
# Question
{question}
# Answer:
""".strip()
import os
from azure.identity import ClientSecretCredential, get_bearer_token_provider
scope = "https://cognitiveservices.azure.com/.default"
credential = ClientSecretCredential(
tenant_id=os.environ.get('AZURE_TENANT_ID', AZURE_TENANT_ID),
client_id=os.environ.get('AZURE_CLIENT_ID' ,AZURE_CLIENT_ID),
client_secret=os.environ.get('AZURE_CLIENT_SECRET', AZURE_CLIENT_SECRET)
)
azure_ad_token_provider = get_bearer_token_provider(credential, scope)
from openai import AzureOpenAI
client = AzureOpenAI(
azure_endpoint=os.environ.get('AZURE_OPENAI_ENDPOINT', AZURE_OPENAI_ENDPOINT),
api_version="2024-02-15-preview",
azure_ad_token_provider=azure_ad_token_provider
)
tmp = []
max_tokens = 4096
for index, row in df.iterrows():
print(index)
context = row['context']
question = row['question']
response = client.chat.completions.create(
model=os.environ.get('AZURE_OPENAI_DEPLOYMENT_NAME', AZURE_OPENAI_DEPLOYMENT_NAME),
messages=[{"role": "user", "content": PROMPT_TEMPLATE.format(context=context, question=question)}],
max_tokens=max_tokens
)
tmp.append(response.choices[0].message.content)
tmp
df['generated_text'] = tmp
df
from ibm_watson_studio_lib import access_project_or_space
wslib = access_project_or_space({'token':TOKEN})
df.to_csv("test.csv", index=False)
res = wslib.upload_file("test.csv", overwrite=True)
print(res)