使う関数
register_arm([腕(string)])
register_reward([腕を回す人(string)], [腕(string)], [報酬(float)])
[次に回す腕(string)] = select_arm([腕を回す人(string)])
Config(UCB1の場合)
ucb1 softmax exp3 epsilon_greedyがあるっぽい
ucb1.json
{
"method": "ucb1",
"parameter": {
}
}
起動
jubatus-0.7.0/config/bandit/の中にサンプルjsonがあるので使うのが楽
jubabandit -f /usr/local/src/jubatus-installer-master/download/jubatus-0.7.0/config/bandit/ucb1.json
pythonサンプル
bandit.py
#!/usr/bin/env python
# coding: utf-8
host = '127.0.0.1'
port = 9199
name = 'test'
import sys
import json
import random
import jubatus
from jubatus.common import Datum
def train(client):
arms = [
u'ヤフー',
u'グーグル',
u'楽天',
]
player = u'bandit'
for arm in arms:
client.register_arm(arm)
reword = 0.0 if random.randint(1, 100) <= 70 else 1.0
client.register_reward(player, arm, reword)
sys.stdout.write(arm.encode('utf-8'))
sys.stdout.write('\n')
sys.stdout.write(str(reword))
sys.stdout.write('\n')
def predict(client):
player = u'bandit'
res = client.select_arm(player)
sys.stdout.write(str(res))
sys.stdout.write('\n')
if __name__ == '__main__':
# connect to the jubatus
client = jubatus.Bandit(host, port, name)
# run example
train(client)
predict(client)