LoginSignup
6

More than 5 years have passed since last update.

Elasticsearchチュートリアルでデータを入れる

Last updated at Posted at 2016-08-16

以下でデータを入れる方法が書いてないので、入れるためのメモです。

データセットはこちらにあります。

事前準備

mapping.json

{
  "settings": {
    "analysis": {
      "analyzer": {
        "ngram_analyzer": {
          "tokenizer": "ngram_tokenizer"
        }
      },
      "tokenizer": {
        "ngram_tokenizer": {
          "type": "nGram",
          "min_gram": "2",
          "max_gram": "3",
          "token_chars": [
            "letter",
            "digit"
          ]
        }
      }
    }
  },
  "mappings": {
    "restaurant": {
      "properties": {
        "restaurant_id": {
          "type": "integer"
        },
        "name": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "name_alphabet": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "name_kana": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "address": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "description": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "purpose": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "category": {
          "type": "string",
          "analyzer": "whitespace"
        },
        "photo_count": {
          "type": "integer"
        },
        "menu_count": {
          "type": "integer"
        },
        "access_count": {
          "type": "integer"
        },
        "closed": {
          "type": "boolean"
        },
        "location": {
          "type": "geo_point",
          "store": "yes"
        }
      }
    },
    "rating": {
      "properties": {
        "rating_id": {
          "type": "integer"
        },
        "total": {
          "type": "integer"
        },
        "food": {
          "type": "integer"
        },
        "service": {
          "type": "integer"
        },
        "atmosphere": {
          "type": "integer"
        },
        "cost_performance": {
          "type": "integer"
        },
        "title": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "body": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "purpose": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        }
      }
    }
  }
}

index, typeを作る

curl -XPUT 'localhost:9200/ldgourmet' -d @mapping.json

データを整形する

変換出来るスクリプトを作る(さくっとかいてます)

#!/usr/bin/env ruby

require 'csv'
require 'json'
require 'securerandom'

line = STDIN.gets.chomp
csv = CSV.new(line)
header = csv.to_a[0]

INDEX = "ldgourmet"
TYPE  = "restaurant"

def string_to_float(string)
  string =~ /([0-9]+)\.([0-9]+)\.(.+)/
  ($1.to_f + ($2.to_f / 60) +  ($3.to_f / 60**2)).to_s
end


CSV(STDIN).each_with_index do |row, i|
  index = { "index" =>
    { "_index" => INDEX, "_type" => TYPE, "_id" => SecureRandom.uuid }
  }
  # puts JSON.dump(index)
  hash = Hash[header.zip row]

  hash["location"] = {
    "lat": string_to_float(hash["north_latitude"]),
    "lon": string_to_float(hash["east_longitude"]),
  }

  puts JSON.dump(hash)
end

変換する

cat restaurants.csv | ruby csv2json.rb > restaurants.simple.json 

データを入れる

stream2esをダウンロード

curl -O download.elasticsearch.org/stream2es/stream2es; chmod +x stream2es

インポート

cat restaurants.simple.json | ./stream2es stdin --target http://localhost:9200/ldgourmet/restaurant 

Inquisitorで見てみる

はいっていそうですね。

image

その他

間違えてしまったときにindexを消したいときは以下(Sense使用)

DELETE ldgourmet

参考

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
6