以下でデータを入れる方法が書いてないので、入れるためのメモです。
データセットはこちらにあります。
事前準備
- kuromojiをデフォルトとして使う
http://dev.classmethod.jp/cloud/aws/use-elasticsearch-1-use-kuromoji/
mapping.json
{
"settings": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "2",
"max_gram": "3",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"restaurant": {
"properties": {
"restaurant_id": {
"type": "integer"
},
"name": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"name_alphabet": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"name_kana": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"address": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"description": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"purpose": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"category": {
"type": "string",
"analyzer": "whitespace"
},
"photo_count": {
"type": "integer"
},
"menu_count": {
"type": "integer"
},
"access_count": {
"type": "integer"
},
"closed": {
"type": "boolean"
},
"location": {
"type": "geo_point",
"store": "yes"
}
}
},
"rating": {
"properties": {
"rating_id": {
"type": "integer"
},
"total": {
"type": "integer"
},
"food": {
"type": "integer"
},
"service": {
"type": "integer"
},
"atmosphere": {
"type": "integer"
},
"cost_performance": {
"type": "integer"
},
"title": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"body": {
"type": "string",
"analyzer": "ngram_analyzer"
},
"purpose": {
"type": "string",
"analyzer": "ngram_analyzer"
}
}
}
}
}
index, typeを作る
curl -XPUT 'localhost:9200/ldgourmet' -d @mapping.json
データを整形する
変換出来るスクリプトを作る(さくっとかいてます)
#!/usr/bin/env ruby
require 'csv'
require 'json'
require 'securerandom'
line = STDIN.gets.chomp
csv = CSV.new(line)
header = csv.to_a[0]
INDEX = "ldgourmet"
TYPE = "restaurant"
def string_to_float(string)
string =~ /([0-9]+)\.([0-9]+)\.(.+)/
($1.to_f + ($2.to_f / 60) + ($3.to_f / 60**2)).to_s
end
CSV(STDIN).each_with_index do |row, i|
index = { "index" =>
{ "_index" => INDEX, "_type" => TYPE, "_id" => SecureRandom.uuid }
}
# puts JSON.dump(index)
hash = Hash[header.zip row]
hash["location"] = {
"lat": string_to_float(hash["north_latitude"]),
"lon": string_to_float(hash["east_longitude"]),
}
puts JSON.dump(hash)
end
変換する
cat restaurants.csv | ruby csv2json.rb > restaurants.simple.json
データを入れる
stream2esをダウンロード
curl -O download.elasticsearch.org/stream2es/stream2es; chmod +x stream2es
インポート
cat restaurants.simple.json | ./stream2es stdin --target http://localhost:9200/ldgourmet/restaurant
Inquisitorで見てみる
はいっていそうですね。
その他
間違えてしまったときにindexを消したいときは以下(Sense使用)
DELETE ldgourmet