Pythonパッケージazure-ai-textanalyticsの6.0.0b2を使ってみました。
以下には5.Xの書き方があるのですが、6.X の書き方が見つかりにくかったので、記録に残しておきます。
シナリオ
以下の3つのAPIを使っています。5.Xだと「個人を特定できる情報の検出」でカテゴリ(名前や組織等)の指定ができなかったので、プレリリースながら6.Xを使いました。
- キーフレーズの抽出
- 感情分析
- 個人を特定できる情報の検出
Python
UBuntu24.0.4でPython3.14.2を使っています。
前提としてMicrosoft Foundryリソースを作成しています。
5.xと比べると、すべてのAPIがanalyze_text関数に集約されていてbodyに渡す内容でコントロールしているのがわかります。
from azure.ai.textanalytics import TextAnalysisClient
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics.models import (
AnalyzeTextKeyPhraseResult,
ConfidenceScoreThreshold,
EntityMaskPolicyType,
KeyPhraseActionContent,
PiiActionContent,
MultiLanguageTextInput,
MultiLanguageInput,
TextPiiEntitiesRecognitionInput,
TextKeyPhraseExtractionInput,
AnalyzeTextPiiResult,
TextSentimentAnalysisInput,
AnalyzeTextSentimentResult,
)
language_key = "<key>"
language_endpoint = "https://<resource>.cognitiveservices.azure.com/"
# Authenticate the client using your key and endpoint
def authenticate_client():
ta_credential = AzureKeyCredential(language_key)
text_analytics_client = TextAnalysisClient(
endpoint=language_endpoint,
credential=ta_credential)
return text_analytics_client
client = authenticate_client()
def pre_analyze_text(kind: str, text: str)-> tuple:
"""
Prepares and calls the unified text analysis function.
Args:
kind: Type of analysis - "sentiment", "key_phrase", or "pii"
text: Text to analyze
"""
# Common input structure
common_input = MultiLanguageTextInput(
multi_language_inputs=[MultiLanguageInput(id="1", text=text, language="ja")]
)
# Build body based on kind
if kind == "sentiment":
body = TextSentimentAnalysisInput(text_input=common_input)
result_type = AnalyzeTextSentimentResult
elif kind == "key_phrase":
body = TextKeyPhraseExtractionInput(
text_input=common_input,
action_content=KeyPhraseActionContent(model_version="latest")
)
result_type = AnalyzeTextKeyPhraseResult
elif kind == "pii":
body = TextPiiEntitiesRecognitionInput(
text_input=common_input,
action_content=PiiActionContent(
pii_categories=["Organization", "Email", "URL", "PhoneNumber", "Person", "Address"],
redaction_policies=[
EntityMaskPolicyType(policy_name="entityMask", is_default=True),
],
confidence_score_threshold=ConfidenceScoreThreshold(default=0.5)
)
)
result_type = AnalyzeTextPiiResult
else:
print(f"Unknown kind: {kind}. Use 'sentiment', 'key_phrase', or 'pii'.")
raise ValueError("Invalid analysis kind.")
return body, result_type
# Unified text analysis function
def analyze_text(client, kind: str, text: str):
"""
Unified function for text analysis.
Args:
client: TextAnalysisClient instance
kind: Type of analysis - "sentiment", "key_phrase", or "pii"
text: Text to analyze
"""
try:
body, result_type = pre_analyze_text(kind, text)
result = client.analyze_text(body=body)
if not isinstance(result, result_type) or not result.results or not result.results.documents:
print("No documents in the response or unexpected result type.")
return result
# Output based on kind
print(f"\nDocument ID: {result.results.documents[0].id}")
if kind == "sentiment":
return result.results.documents[0].sentiment.value
elif kind == "key_phrase":
if result.results.documents[0].key_phrases:
return str(result.results.documents[0].key_phrases)
else:
return None
elif kind == "pii":
return result.results.documents[0].redacted_text
except Exception as e:
print(f"Error executing analysis: {e}")
return None
# Test the unified function
text_sample = "Microsoft 人事部の山田さんには080-1234-5678かyamada@test.comで連絡して。"
print("=== Sentiment Analysis ===")
print(analyze_text(client, "sentiment", text_sample))
print("\n=== Key Phrase Extraction ===")
print(analyze_text(client, "key_phrase", text_sample))
print("\n=== PII Recognition ===")
print(analyze_text(client, "pii", text_sample))
以下は実行結果です。
=== Sentiment Analysis ===
Document ID: 1
neutral
=== Key Phrase Extraction ===
Document ID: 1
['Microsoft 人事部', '山田さん']
=== PII Recognition ===
Document ID: 1
[ORGANIZATION-1] 人事部の[PERSON-2]さんには[PHONENUMBER-3]か[EMAIL-4]で連絡して。