elasticsearchと戯れることがあるので、メモ。
install
macでやる場合はbrewで一発
brew update
brew uninstall elasticsearch <<<<<< 古いやつを消す
brew install elasticsearch
macでない場合は、サイトdownloadしてきて入れるのがいい
pluginインストール
- plugin -install mobz/elasticsearch-head クラスタ管理
- plugin -install elasticsearch/marvel/latest 起動してみると、kibanaぽいのがみれる。7日間の無料トライアル
- plugin -install polyfractal/elasticsearch-inquisitor Queryデバッグ
pluginインストールするときはバージョンに注意。例えば日本語対応するためにいれるkuromojiなどはelasticsearchのバージョンによって対応しているバージョンが違う
今回出てくる用語(ざっくり)
index
RDBのデータベースのようなものtype
RDBのデータベースのようなものdocument
データmapping
テーブル定義みたいなもの
使い方
- ちゃんと動いているかは以下のコマンドで確認
curl http://localhost:9200/
- indexを作成
curl -XPUT 'http://localhost:9200/test/'
kuromojiを指定しての作成
curl -XPUT http://localhost:9200/test_kuromoji -d '{ "index": { "analysis": { "tokenizer": { "kuromoji_user_dict" : { "type":"kuromoji_tokenizer" } }, "analyzer": { "analyzer": { "type":"custom", "tokenizer": "kuromoji_user_dict" }}}}}'
- indexの設定確認
curl -X GET localhost:9200/photo_search/_stats?pretty=true
{
"_shards" : {
"total" : 10,
"successful" : 5,
"failed" : 0
},
"_all" : {
"primaries" : {
"docs" : {
"count" : 0,
"deleted" : 0
},
"store" : {
"size_in_bytes" : 795,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 0,
"index_time_in_millis" : 0,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0,
"noop_update_total" : 0,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 0,
"time_in_millis" : 0,
"exists_total" : 0,
"exists_time_in_millis" : 0,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 0,
"query_time_in_millis" : 0,
"query_current" : 0,
"fetch_total" : 0,
"fetch_time_in_millis" : 0,
"fetch_current" : 0,
"scroll_total" : 0,
"scroll_time_in_millis" : 0,
"scroll_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 0,
"total_time_in_millis" : 0,
"total_docs" : 0,
"total_size_in_bytes" : 0,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 0,
"total_auto_throttle_in_bytes" : 104857600
},
"refresh" : {
"total" : 5,
"total_time_in_millis" : 0
},
"flush" : {
"total" : 5,
"total_time_in_millis" : 0
},
"warmer" : {
"current" : 0,
"total" : 15,
"total_time_in_millis" : 2
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"percolate" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0,
"memory_size_in_bytes" : -1,
"memory_size" : "-1b",
"queries" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 0,
"memory_in_bytes" : 0,
"terms_memory_in_bytes" : 0,
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 0,
"index_writer_memory_in_bytes" : 0,
"index_writer_max_memory_in_bytes" : 2560000,
"version_map_memory_in_bytes" : 0,
"fixed_bit_set_memory_in_bytes" : 0
},
"translog" : {
"operations" : 0,
"size_in_bytes" : 215
},
"suggest" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 0,
"miss_count" : 0
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 0
}
},
"total" : {
"docs" : {
"count" : 0,
"deleted" : 0
},
"store" : {
"size_in_bytes" : 795,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 0,
"index_time_in_millis" : 0,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0,
"noop_update_total" : 0,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 0,
"time_in_millis" : 0,
"exists_total" : 0,
"exists_time_in_millis" : 0,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 0,
"query_time_in_millis" : 0,
"query_current" : 0,
"fetch_total" : 0,
"fetch_time_in_millis" : 0,
"fetch_current" : 0,
"scroll_total" : 0,
"scroll_time_in_millis" : 0,
"scroll_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 0,
"total_time_in_millis" : 0,
"total_docs" : 0,
"total_size_in_bytes" : 0,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 0,
"total_auto_throttle_in_bytes" : 104857600
},
"refresh" : {
"total" : 5,
"total_time_in_millis" : 0
},
"flush" : {
"total" : 5,
"total_time_in_millis" : 0
},
"warmer" : {
"current" : 0,
"total" : 15,
"total_time_in_millis" : 2
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"percolate" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0,
"memory_size_in_bytes" : -1,
"memory_size" : "-1b",
"queries" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 0,
"memory_in_bytes" : 0,
"terms_memory_in_bytes" : 0,
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 0,
"index_writer_memory_in_bytes" : 0,
"index_writer_max_memory_in_bytes" : 2560000,
"version_map_memory_in_bytes" : 0,
"fixed_bit_set_memory_in_bytes" : 0
},
"translog" : {
"operations" : 0,
"size_in_bytes" : 215
},
"suggest" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 0,
"miss_count" : 0
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 0
}
}
},
"indices" : {
"photo_index" : {
"primaries" : {
"docs" : {
"count" : 0,
"deleted" : 0
},
"store" : {
"size_in_bytes" : 795,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 0,
"index_time_in_millis" : 0,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0,
"noop_update_total" : 0,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 0,
"time_in_millis" : 0,
"exists_total" : 0,
"exists_time_in_millis" : 0,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 0,
"query_time_in_millis" : 0,
"query_current" : 0,
"fetch_total" : 0,
"fetch_time_in_millis" : 0,
"fetch_current" : 0,
"scroll_total" : 0,
"scroll_time_in_millis" : 0,
"scroll_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 0,
"total_time_in_millis" : 0,
"total_docs" : 0,
"total_size_in_bytes" : 0,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 0,
"total_auto_throttle_in_bytes" : 104857600
},
"refresh" : {
"total" : 5,
"total_time_in_millis" : 0
},
"flush" : {
"total" : 5,
"total_time_in_millis" : 0
},
"warmer" : {
"current" : 0,
"total" : 15,
"total_time_in_millis" : 2
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"percolate" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0,
"memory_size_in_bytes" : -1,
"memory_size" : "-1b",
"queries" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 0,
"memory_in_bytes" : 0,
"terms_memory_in_bytes" : 0,
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 0,
"index_writer_memory_in_bytes" : 0,
"index_writer_max_memory_in_bytes" : 2560000,
"version_map_memory_in_bytes" : 0,
"fixed_bit_set_memory_in_bytes" : 0
},
"translog" : {
"operations" : 0,
"size_in_bytes" : 215
},
"suggest" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 0,
"miss_count" : 0
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 0
}
},
"total" : {
"docs" : {
"count" : 0,
"deleted" : 0
},
"store" : {
"size_in_bytes" : 795,
"throttle_time_in_millis" : 0
},
"indexing" : {
"index_total" : 0,
"index_time_in_millis" : 0,
"index_current" : 0,
"index_failed" : 0,
"delete_total" : 0,
"delete_time_in_millis" : 0,
"delete_current" : 0,
"noop_update_total" : 0,
"is_throttled" : false,
"throttle_time_in_millis" : 0
},
"get" : {
"total" : 0,
"time_in_millis" : 0,
"exists_total" : 0,
"exists_time_in_millis" : 0,
"missing_total" : 0,
"missing_time_in_millis" : 0,
"current" : 0
},
"search" : {
"open_contexts" : 0,
"query_total" : 0,
"query_time_in_millis" : 0,
"query_current" : 0,
"fetch_total" : 0,
"fetch_time_in_millis" : 0,
"fetch_current" : 0,
"scroll_total" : 0,
"scroll_time_in_millis" : 0,
"scroll_current" : 0
},
"merges" : {
"current" : 0,
"current_docs" : 0,
"current_size_in_bytes" : 0,
"total" : 0,
"total_time_in_millis" : 0,
"total_docs" : 0,
"total_size_in_bytes" : 0,
"total_stopped_time_in_millis" : 0,
"total_throttled_time_in_millis" : 0,
"total_auto_throttle_in_bytes" : 104857600
},
"refresh" : {
"total" : 5,
"total_time_in_millis" : 0
},
"flush" : {
"total" : 5,
"total_time_in_millis" : 0
},
"warmer" : {
"current" : 0,
"total" : 15,
"total_time_in_millis" : 2
},
"query_cache" : {
"memory_size_in_bytes" : 0,
"total_count" : 0,
"hit_count" : 0,
"miss_count" : 0,
"cache_size" : 0,
"cache_count" : 0,
"evictions" : 0
},
"fielddata" : {
"memory_size_in_bytes" : 0,
"evictions" : 0
},
"percolate" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0,
"memory_size_in_bytes" : -1,
"memory_size" : "-1b",
"queries" : 0
},
"completion" : {
"size_in_bytes" : 0
},
"segments" : {
"count" : 0,
"memory_in_bytes" : 0,
"terms_memory_in_bytes" : 0,
"stored_fields_memory_in_bytes" : 0,
"term_vectors_memory_in_bytes" : 0,
"norms_memory_in_bytes" : 0,
"doc_values_memory_in_bytes" : 0,
"index_writer_memory_in_bytes" : 0,
"index_writer_max_memory_in_bytes" : 2560000,
"version_map_memory_in_bytes" : 0,
"fixed_bit_set_memory_in_bytes" : 0
},
"translog" : {
"operations" : 0,
"size_in_bytes" : 215
},
"suggest" : {
"total" : 0,
"time_in_millis" : 0,
"current" : 0
},
"request_cache" : {
"memory_size_in_bytes" : 0,
"evictions" : 0,
"hit_count" : 0,
"miss_count" : 0
},
"recovery" : {
"current_as_source" : 0,
"current_as_target" : 0,
"throttle_time_in_millis" : 0
}
}
}
}
}
- analyzerの指定
curl -XGET 'http://localhost:9200/test/_analyze?pretty=true&analyzer=kuromoji' -d '東京都目黒区'
出力結果
{
"tokens" : [ {
"token" : "東京",
"start_offset" : 0,
"end_offset" : 2,
"type" : "word",
"position" : 1
}, {
"token" : "都",
"start_offset" : 2,
"end_offset" : 3,
"type" : "word",
"position" : 2
}, {
"token" : "目黒",
"start_offset" : 3,
"end_offset" : 5,
"type" : "word",
"position" : 3
}, {
"token" : "区",
"start_offset" : 5,
"end_offset" : 6,
"type" : "word",
"position" : 4
} ]
}
- ドキュメントの追加・更新
curl -XPUT http://localhost:9200/test/hoge_type/1 -d '{ "title":"としの名前", "text":"渋谷区だ" }'
curl -XPUT http://localhost:9200/test/hoge_type/1 -d '{ "title":"と都市名前", "text":"渋新宿区だ" }'
- ドキュメントの検索 kuromojiを使ってない場合
curl -XGET http://localhost:9200/test/hoge_type/_search -d '{ "query": { "match": { "title":"都市" } } }'
出力結果
{"took":17,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":1,"max_score":0.18985549,"hits":[{"_index":"test","_type":"hoge_type","_id":"1","_score":0.18985549,"_source":{ "title":"と都市名前", "text":"渋新宿区だ" }}]}}192:local nakamura01$
kuromojiを使っている場合
curl -XPUT http://localhost:9200/test_kuromoji/test/1 -d '{ "title":"メモ1", "text":"ラーメンは飲み物" }'
curl -XPUT http://localhost:9200/test_kuromoji/test/2 -d '{ "title":"メモ2", "text":"ラーメンライスは和食" }'
上記のデータを検索する場合の例
curl -XGET http://localhost:9200/test_kuromoji/test/_search -d '{"query":{"match":{"text":"ラーメン"}}}'
curl -XGET http://localhost:9200/test_kuromoji/test/_search -d '{"query":{"match":{"text":"ラーメンは"}}}'
{"took":9,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":2,"max_score":0.18985549,"hits":[{"_index":"test_kuromoji","_type":"test","_id":"1","_score":0.18985549,"_source":{ "title":"メモ1", "text":"ラーメンは飲み物" }},{"_index":"test_kuromoji","_type":"test","_id":"2","_score":0.02250402,"_source":{ "title":"メモ2", "text":"ラーメンライスは和食" }}]}}
- マッピング テスト用のindexを作る
curl -XPUT http://localhost:9200/blog -d '{ "index": { "analysis": { "tokenizer": { "kuromoji_user_dict" : { "type":"kuromoji_tokenizer" } }, "analyzer": { "analyzer": { "type":"custom", "tokenizer": "kuromoji_user_dict" }}}}}'
データ投入
curl -XPUT 'localhost:9200/blog/story/1' -d '{
"title" : "Elasticsearch 特徴まとめ",
"subtitle" : "Elasticsearch Features — 主にシステムを中心とした特徴まとめ","contents": "Elasticsearch とは?簡単に説明すると、クラウド向けに構築された、RESTful な APIを提供する分散型のサーチエンジンアプリケーション。オープンソースで提供されています。",
"tags" : ["Elasticsearch", "Search-Engine"],
"pub_date" : "2014-04-10T01:40:00",
"author" : "Kunihiko Kido",
"views" : 82,
"reads": 60,
"read_ratio": 0.73,
"enabled": true
}'
マッピングの確認
curl -XGET 'localhost:9200/blog/_mapping/story?pretty=true'
結果
{
"blog" : {
"mappings" : {
"story" : {
"properties" : {
"author" : {
"type" : "string"
},
"contents" : {
"type" : "string"
},
"enabled" : {
"type" : "boolean"
},
"pub_date" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"read_ratio" : {
"type" : "double"
},
"reads" : {
"type" : "long"
},
"subtitle" : {
"type" : "string"
},
"tags" : {
"type" : "string"
},
"title" : {
"type" : "string"
},
"views" : {
"type" : "long"
}
}
}
}
}
}
これは自動マッピングの確認
でも大抵の場合は手動でマッピングする必要がある。
curl -XPUT 'localhost:9200/blog' -d '
{
"mappings" : {
"story" : {
"properties" : {
"author" : {
"type" : "string"
},
"contents" : {
"type" : "string",
"analyzer": "kuromoji"
},
"enabled" : {
"type" : "boolean"
},
"pub_date" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"read_ratio" : {
"type" : "double"
},
"reads" : {
"type" : "long"
},
"subtitle" : {
"type" : "string",
"analyzer": "kuromoji"
},
"title" : {
"type" : "string",
"analyzer": "kuromoji"
},
"views" : {
"type" : "long"
}
}
}
}
}
}'
設定したマッピングで動作確認
curl -XGET 'localhost:9200/blog/_analyze?field=author&pretty=true' -d "Search-Engine"
{
"tokens" : [ {
"token" : "search",
"start_offset" : 0,
"end_offset" : 6,
"type" : "<ALPHANUM>",
"position" : 1
}, {
"token" : "engine",
"start_offset" : 7,
"end_offset" : 13,
"type" : "<ALPHANUM>",
"position" : 2
} ]
}
- cluster こちらはページ別にしよう。