はじめに
海外ドラマやYouTubeの英会話チャネルを見ているときに「あっ、この表現かっこいい。覚えよう。」と思ったけれど、数日後には「あれ、何を覚えようとしていたっけ。」となったりしませんか?そんなあなた(自分)のために、覚えたい単語、フレーズをしつこく教えてくれる、あなただけの英会話講師(Parrot Tutor)をAlexaに召喚します。
Parrot Tutorはどのようなスキル?
Student: "Alexa, talk to parrot tutor."
Alexa: "Welcome, I can help you remember words or phrases. Please ask me to add what you want to remember."
Student: "Add a word, righteous."
Alexa: "Righteous. Is it ok?"
Student: "Yes."
Alexa: "I remember the word, righteous, which means 正義の."
Student: "I want to learn 3 words."
Alexa: "Righteous, 正義の. Excellent, ..."
手順
スキルの作成
Alexa developer consoleにログインし、新しいスキルを作成します。デフォルトの言語は英語(米国)を選択します。バックエンドリソースは、ユーザー定義のプロビジョニングを選択します。
呼び出し名
呼び出し名を変更しておきます。
モデルの作成
Alexa developer console上で対話モデルを作成します。
{
"interactionModel": {
"languageModel": {
"invocationName": "parrot tutor",
"intents": [
{
"name": "AMAZON.CancelIntent",
"samples": []
},
{
"name": "AMAZON.HelpIntent",
"samples": []
},
{
"name": "AMAZON.StopIntent",
"samples": []
},
{
"name": "AMAZON.NavigateHomeIntent",
"samples": []
},
{
"name": "AMAZON.FallbackIntent",
"samples": []
},
{
"name": "AddWordIntent",
"slots": [
{
"name": "wordSlot",
"type": "AMAZON.SearchQuery"
}
],
"samples": [
"add a word {wordSlot}",
"append a word {wordSlot}",
"store a word {wordSlot}",
"save a word {wordSlot}",
"remember a word {wordSlot}"
]
},
{
"name": "LearnIntent",
"slots": [
{
"name": "countSlot",
"type": "AMAZON.NUMBER"
},
{
"name": "headingTypeSlot",
"type": "PluralHeadingType"
}
],
"samples": [
"I want to learn {headingTypeSlot}",
"learn {headingTypeSlot}",
"speak {headingTypeSlot}",
"tell me {headingTypeSlot}",
"tell me {countSlot} {headingTypeSlot}",
"speak {countSlot} {headingTypeSlot}",
"learn {countSlot} {headingTypeSlot}",
"I want to learn {countSlot} {headingTypeSlot}"
]
},
{
"name": "AddPhraseIntent",
"slots": [
{
"name": "phraseSlot",
"type": "AMAZON.SearchQuery"
}
],
"samples": [
"remember a phrase {phraseSlot}",
"save a phrase {phraseSlot}",
"store a phrase {phraseSlot}",
"append a phrase {phraseSlot}",
"add a phrase {phraseSlot}"
]
}
],
"types": [
{
"name": "PluralHeadingType",
"values": [
{
"name": {
"value": "phrases"
}
},
{
"name": {
"value": "words"
}
}
]
}
]
},
"dialog": {
"intents": [
{
"name": "AddWordIntent",
"confirmationRequired": true,
"prompts": {
"confirmation": "Confirm.Intent.321024708461"
},
"slots": [
{
"name": "wordSlot",
"type": "AMAZON.SearchQuery",
"confirmationRequired": false,
"elicitationRequired": false,
"prompts": {}
}
]
},
{
"name": "AddPhraseIntent",
"confirmationRequired": true,
"prompts": {
"confirmation": "Confirm.Intent.334458378973"
},
"slots": [
{
"name": "phraseSlot",
"type": "AMAZON.SearchQuery",
"confirmationRequired": false,
"elicitationRequired": false,
"prompts": {}
}
]
}
],
"delegationStrategy": "ALWAYS"
},
"prompts": [
{
"id": "Confirm.Intent.321024708461",
"variations": [
{
"type": "PlainText",
"value": "{wordSlot} . Is it ok?"
}
]
},
{
"id": "Confirm.Intent.334458378973",
"variations": [
{
"type": "PlainText",
"value": "{phraseSlot} . Is it ok?"
}
]
}
]
}
}
Lambda環境の構築
Alexa-hostedスキルを作成すると、AWSアカウントなくAlexa開発者コンソールだけで作成、編集、公開が完結しますが、Translate APIなどAWSの機能を使いたいので、サービスのエンドポイントは独自に構築したAWS Lambdaでホストします。
Lambda Layerパッケージの作成
$ mkdir parrot && cd parrot
$ pyenv local 3.8.10
$ python -m venv venv
$ . venv/bin/activate
$ mkdir -p layer/python && cd layer
$ pip install -t python -r python/requirements.txt
$ zip -r python.zip python
ask-sdk-core==1.11.0
ask-sdk-dynamodb-persistence-adapter==1.15.0
boto3==1.9.216
Lambda関数の作成
AWSマネジメントコンソールからLambdaサービスにアクセスして、Lambda関数を作成します。
レイヤーパッケージのアップロード
先程作成したpython.zip をアップロードしてカスタムレイヤーを作成します。
レイヤーの追加
作成したLambda関数を選択して、レイヤーを追加します。
トリガーの設定
Alexa developer consoleのAlexaスキル一覧で、「スキルIDをコピー」を選択します。Lambdaサービスで、Lambda関数にトリガーを追加します。スキルID検証を有効にして、コピーしたスキルIDをペーストします。
lambda_function.pyの実装
# -*- coding: utf-8 -*-
import boto3
import decimal
import learning_db
import logging
import random
import ask_sdk_core.utils as ask_utils
from ask_sdk_core.skill_builder import SkillBuilder
from ask_sdk_core.dispatch_components import AbstractRequestHandler
from ask_sdk_core.dispatch_components import AbstractExceptionHandler
from ask_sdk_core.handler_input import HandlerInput
from ask_sdk_model import Response
from datetime import datetime
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class LaunchRequestHandler(AbstractRequestHandler):
"""Handler for Skill Launch."""
def can_handle(self, handler_input):
return ask_utils.is_request_type("LaunchRequest")(handler_input)
def handle(self, handler_input):
speak_output = "Welcome, I can help you remember words or phrases. Please ask me to add what you want to remember."
return (
handler_input.response_builder
.speak(speak_output)
.ask(speak_output)
.response
)
class AddIntentHandler(AbstractRequestHandler):
def __init__(self, heading_type):
self.heading_type = heading_type
"""Handler for Add Item Intent."""
def can_handle(self, handler_input):
return ask_utils.is_intent_name(f"Add{self.heading_type}Intent")(handler_input)
def handle(self, handler_input):
global db, translate
user_id = ask_utils.get_user_id(handler_input)
locale = ask_utils.get_locale(handler_input)
heading = ask_utils.get_slot_value(handler_input, f"{self.heading_type.lower()}Slot")
response = translate.translate_text(
Text=heading,
SourceLanguageCode="en",
TargetLanguageCode="ja"
)
translation = response['TranslatedText']
db.put_item(user_id, heading, self.heading_type.upper(), translation=translation, sequence=0, next_sequence=0)
translation_output = f"<voice name=\"Joanna\"><lang xml:lang=\"ja-JP\">{translation}</lang></voice>"
speak_output = f"I remember the {self.heading_type}, {heading}, which means {translation_output}."
reprompt = "Add another item or learn to memorize?"
return (
handler_input.response_builder
.speak(speak_output)
.ask(reprompt)
.response
)
class LearnIntentHandler(AbstractRequestHandler):
"""Handler for Learn Intent."""
def can_handle(self, handler_input):
return ask_utils.is_intent_name("LearnIntent")(handler_input)
def handle(self, handler_input):
global db
user_id = ask_utils.get_user_id(handler_input)
count_slot = ask_utils.get_slot_value(handler_input, "countSlot")
count = int(count_slot) if count_slot is not None else 3
heading_type_slot = ask_utils.get_slot_value(handler_input, "headingTypeSlot")
heading_type = "word" if heading_type_slot == "words" else "phrase"
heading_type_keyword = heading_type.upper()
max_learned_count = db.get_max_learned_count(user_id, heading_type_keyword)
filtered_items = []
for learned_count in range(max_learned_count + 1):
items = db.query_item(user_id, heading_type_keyword, learned_count)
filtered_items.extend(random.sample(items, len(items)))
if len(filtered_items) > count:
break
total = min(count, len(filtered_items))
outputs = []
for item in filtered_items[0:total]:
heading = item['heading']
translation = f"<voice name=\"Joanna\"><lang xml:lang=\"ja-JP\">{item['translation']}</lang></voice>"
outputs.append(f"{heading}. {translation}")
if item['learned_count'] >= max_learned_count:
max_learned_count = item['learned_count'] + 1
db.increment_learned_count(user_id, heading)
if total > 0:
db.update_max_learned_count(user_id, heading_type_keyword, max_learned_count)
plural = "s" if total > 1 else ""
if total != count:
speak_output = f"You've added only {total} {heading_type}{plural}. " + ". ".join(outputs)
else:
speak_output = f"Let's learn {total} {heading_type}{plural}. " + ". ".join(outputs)
else:
speak_output = f"You haven't added any {heading_type} yet."
reprompt = "Add another item or learn to memorize?"
return (
handler_input.response_builder
.speak(speak_output)
.ask(reprompt)
.response
)
class HelpIntentHandler(AbstractRequestHandler):
"""Handler for Help Intent."""
def can_handle(self, handler_input):
return ask_utils.is_intent_name("AMAZON.HelpIntent")(handler_input)
def handle(self, handler_input):
speak_output = "You can say add a word something or add a phrase something or learn words or learn phrases. What would you like to do?"
return (
handler_input.response_builder
.speak(speak_output)
.ask(speak_output)
.response
)
class CancelOrStopIntentHandler(AbstractRequestHandler):
"""Single handler for Cancel and Stop Intent."""
def can_handle(self, handler_input):
return (ask_utils.is_intent_name("AMAZON.CancelIntent")(handler_input) or
ask_utils.is_intent_name("AMAZON.StopIntent")(handler_input))
def handle(self, handler_input):
speak_output = "Goodbye!"
return (
handler_input.response_builder
.speak(speak_output)
.response
)
class FallbackIntentHandler(AbstractRequestHandler):
"""Single handler for Fallback Intent."""
def can_handle(self, handler_input):
return ask_utils.is_intent_name("AMAZON.FallbackIntent")(handler_input)
def handle(self, handler_input):
logger.info("In FallbackIntentHandler")
speech = "Hmm, I'm not sure. You can say Add, Learn, or Help. What would you like to do?"
reprompt = "I didn't catch that. What can I help you with?"
return handler_input.response_builder.speak(speech).ask(reprompt).response
class SessionEndedRequestHandler(AbstractRequestHandler):
"""Handler for Session End."""
def can_handle(self, handler_input):
return ask_utils.is_request_type("SessionEndedRequest")(handler_input)
def handle(self, handler_input):
# Clean up logic here.
return handler_input.response_builder.response
class IntentReflectorHandler(AbstractRequestHandler):
def can_handle(self, handler_input):
return ask_utils.is_request_type("IntentRequest")(handler_input)
def handle(self, handler_input):
intent_name = ask_utils.get_intent_name(handler_input)
speak_output = "You just triggered " + intent_name + "."
return (
handler_input.response_builder
.speak(speak_output)
.response
)
class CatchAllExceptionHandler(AbstractExceptionHandler):
def can_handle(self, handler_input, exception):
return True
def handle(self, handler_input, exception):
logger.error(exception, exc_info=True)
speak_output = "Sorry, I had trouble doing what you asked. Please try again."
return (
handler_input.response_builder
.speak(speak_output)
.ask(speak_output)
.response
)
db = learning_db.LearningDB()
sb = SkillBuilder()
translate = boto3.client(service_name='translate', region_name='us-west-1', use_ssl=True)
sb.add_request_handler(LaunchRequestHandler())
sb.add_request_handler(AddIntentHandler("Word"))
sb.add_request_handler(AddIntentHandler("Phrase"))
sb.add_request_handler(LearnIntentHandler())
sb.add_request_handler(HelpIntentHandler())
sb.add_request_handler(CancelOrStopIntentHandler())
sb.add_request_handler(FallbackIntentHandler())
sb.add_request_handler(SessionEndedRequestHandler())
# make sure IntentReflectorHandler is last so it doesn't override your custom intent handlers
sb.add_request_handler(IntentReflectorHandler())
sb.add_exception_handler(CatchAllExceptionHandler())
lambda_handler = sb.lambda_handler()
DynamoDB
フレーズや単語は、DynamoDBに格納します。まず、Lambda関数からDynamoDBにアクセスできるようにIAMでロールにポリシーを追加します。Lambda関数のロールはLambda関数を作成した際に自動的に生成されています。
グローバルセカンダリキーにuser_idとlearned_count(学習回数)を用いることで、学習回数でクエリできるようにします。ユーザーには学習回数の少ないものから指定個数を復習対象として提示します。
import boto3
import json
import decimal
from boto3.dynamodb.conditions import Key, Attr
from botocore.exceptions import ClientError
from datetime import datetime
class LearningDB():
def __init__(self):
self.create_textbook_table()
self.create_learning_record_table()
def create_textbook_table(self):
TABLE_NAME = "ask.parrot_tutor.learning_db.textbook_table"
dynamodb = boto3.resource('dynamodb')
try:
self.textbook_table = dynamodb.create_table(
TableName=TABLE_NAME,
KeySchema=[
{
'AttributeName': 'user_id',
'KeyType': 'HASH'
},
{
'AttributeName': 'heading',
'KeyType': 'RANGE'
}
],
AttributeDefinitions=[
{
'AttributeName': 'user_id',
'AttributeType': 'S'
},
{
'AttributeName': 'heading',
'AttributeType': 'S'
},
{
'AttributeName': 'learned_count',
'AttributeType': 'N'
}
],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
},
GlobalSecondaryIndexes=[
{
'IndexName': 'learned_count_index',
'KeySchema': [
{
'AttributeName': 'user_id',
'KeyType': 'HASH'
},
{
'AttributeName': 'learned_count',
'KeyType': 'RANGE'
}
],
'Projection': {
'ProjectionType': 'INCLUDE',
'NonKeyAttributes': [
'heading_type',
'learned_at',
'translation'
]
},
'ProvisionedThroughput': {
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
}
}
]
)
except ClientError as e:
self.textbook_table = dynamodb.Table(TABLE_NAME)
def create_learning_record_table(self):
TABLE_NAME = "ask.parrot_tutor.learning_db.learning_record_table"
dynamodb = boto3.resource('dynamodb')
try:
self.learning_record_table = dynamodb.create_table(
TableName=TABLE_NAME,
KeySchema=[
{
'AttributeName': 'user_id',
'KeyType': 'HASH'
},
{
'AttributeName': 'heading_type',
'KeyType': 'RANGE'
},
],
AttributeDefinitions=[
{
'AttributeName': 'user_id',
'AttributeType': 'S'
},
{
'AttributeName': 'heading_type',
'AttributeType': 'S'
}
],
ProvisionedThroughput={
'ReadCapacityUnits': 5,
'WriteCapacityUnits': 5
}
)
except ClientError as e:
self.learning_record_table = dynamodb.Table(TABLE_NAME)
def put_item(self, user_id, heading, heading_type, translation="", sequence=0, next_sequence=0):
ts = decimal.Decimal(datetime.now().timestamp())
item = {
'user_id': user_id,
'heading': heading,
'created_at': ts,
'updated_at': ts,
'learned_at': ts,
'learned_count': 0,
'heading_type': heading_type,
'translation': translation,
'sequence': sequence,
'next_sequence': next_sequence
}
self.textbook_table.put_item(
Item=item
)
def query_item(self, user_id, heading_type, max_learned_count):
response = self.textbook_table.query(
IndexName="learned_count_index",
KeyConditionExpression=Key('user_id').eq(user_id) & Key('learned_count').eq(max_learned_count),
FilterExpression=Attr('heading_type').contains(heading_type)
)
return response['Items'] if response else []
def get_max_learned_count(self, user_id, heading_type):
try:
response = self.learning_record_table.get_item(
Key={
'user_id': user_id,
'heading_type': heading_type
}
)
if 'Item' in response:
return int(response['Item']['max_learned_count'])
else:
self.learning_record_table.put_item(
Item={
'user_id': user_id,
'heading_type': heading_type,
'max_learned_count': 0
})
return 0
except ClientError as e:
return 0
def increment_learned_count(self, user_id, heading):
ts = decimal.Decimal(datetime.now().timestamp())
response = self.textbook_table.update_item(
Key={
'user_id': user_id,
'heading': heading
},
UpdateExpression='set learned_at=:learned_at, learned_count=learned_count + :one',
ExpressionAttributeValues={
':learned_at': ts,
':one': 1
},
ReturnValues="UPDATED_NEW"
)
def update_max_learned_count(self, user_id, heading_type, count):
response = self.learning_record_table.update_item(
Key={
'user_id': user_id,
'heading_type': heading_type
},
UpdateExpression='set max_learned_count=:max_learned_count',
ExpressionAttributeValues={
':max_learned_count': count
},
ReturnValues="UPDATED_NEW"
)
Translate
登録した単語やフレーズは、Translate APIを使用して翻訳します。DynamoDBと同様にIAMでロールにTranslate APIのポリシーを登録します。
Translate APIの読み出しは簡単で、次のようにboto3.clientのインスタンスを作り、translate_textを呼び出すだけです。
import boto3
translate = boto3.client(service_name='translate', region_name='us-west-1', use_ssl=True)
response = translate.translate_text(
Text=heading,
SourceLanguageCode="en",
TargetLanguageCode="ja"
)
translation = response['TranslatedText']
日本語の翻訳部分は次のようなSSMLで記述して英語と日本語を両方発話できるようにします。
f"<voice name=\"Joanna\"><lang xml:lang=\"ja-JP\">{translation}</lang></voice>"
エンドポイントの設定
最後に、AWS Lambda関数のARNをスキルのエンドポイントに設定します。
Alexa Parrot Tutorスキルのエンドポイントを変更する。
テスト
Alexa developer consoleのシミュレータで、動かしてみます。
まとめ
単語やフレーズを登録してAlexaに繰り返し話してもらうスキルを作成しました。登録の削除ができなかったり、翻訳が適当だったりと改良点はまだまだあるので、もっと効果的な英会話アプリに育ててから公開予定です。