LoginSignup
2
6

More than 5 years have passed since last update.

ElasticSearch基礎編 アナライザ

Posted at

日本語検索(形態素解析)、nGram検索(2文字とかで分割して検索するやつ)をやってみたメモです。
環境はElasticSearchで日本語検索するためのローカル環境構築を使いました。

準備

設定とマッピングを登録する

index test

PUT http://localhost:9200/test

{
  "settings": {
    "index": {
      "analysis": {
        "tokenizer": {
          "ja_text_tokenizer": {
            "type": "kuromoji_tokenizer",
            "mode": "search"
          },
          "ngram_tokenizer": {
            "type": "nGram",
            "min_gram" : 2,
            "max_gram" : 2,
            "token_chars": [
                "letter",
                "digit"
            ]
          }
        },
        "analyzer": {
          "ja_text_analyzer": {
            "tokenizer": "ja_text_tokenizer",
            "type": "custom"
          },
          "ngram_analyzer": {
            "tokenizer": "ngram_tokenizer",
            "type": "custom"
          }
        }
      }
    }
  },
  "mappings": {
    "account": {
      "properties": {
        "name": {
          "type": "string",
          "analyzer": "ja_text_analyzer"
        },
        "userId": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        },
        "id": {
          "type": "long"
        },
        "discription": {
          "type": "string",
          "analyzer": "ngram_analyzer"
        }
      }
    }
  }
}

データを登録する

POST http://localhost:9200/test/account

{
    "id" : 1,
    "name" : "nattyナッティ",
    "userId" : "natty420",
    "description" : "ナッティと言います。よろしくお願いします。"
}
{
    "id" : 2,
    "name" : "もりちゃん",
    "userId" : "morichan",
    "description" : "冷麺の季節ですね"
}
{
    "id" : 3,
    "name" : "どんちゃん",
    "userId" : "donchan",
    "description" : "@morichanとは大の仲良し。また一緒に渋谷で冷麺食べにいきたいね!もりちゃん隊"
}

検索してみる

account内にて「name」に「ちゃん」が含まれているもの

POST http://localhost:9200/test/account/_search
{ 
  "query": { 
    "match": {
       "name" : "ちゃん" 
    }
  }
}

結果

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
    },
    "hits": 
    {
        "total": 2,
        "max_score": 0.25811607,
        "hits": [
        {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOYLCNr31zgqLSH0o",
            "_score": 0.25811607,
            "_source": {
                "id": "3",
                "name": "どんちゃん",
                "userId": "donchan",
                "description": "@morichanとは大の仲良し。また一緒に渋谷で冷麺食べにいきたいね!もりちゃん隊"
            }
        },
          {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOWMrNr31zgqLSH0n",
            "_score": 0.25811607,
            "_source": {
                "id": "2",
                "name": "もりちゃん",
                "userId": "morichan",
                "description": "冷麺の季節ですね"
            }
        }
    ]
    }
}

account内にて「description」に「冷麺食べに」が含まれているもの

POST http://localhost:9200/test/account/_search
{ 
  "query": { 
    "match": {
       "description" : "冷麺食べに" 
    }
  }
}

結果

{
    "took": 1,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
    },
    "hits": 
    {
        "total": 2,
        "max_score": 1.3991506,
        "hits": [
        {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOYLCNr31zgqLSH0o",
            "_score": 1.3991506,
            "_source": {
                "id": "3",
                "name": "どんちゃん",
                "userId": "donchan",
                "description": "@morichanとは大の仲良し。また一緒に渋谷で冷麺食べにいきたいね!もりちゃん隊"
            }
        },
          {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOWMrNr31zgqLSH0n",
            "_score": 0.51623213,
            "_source": {
                "id": "2",
                "name": "もりちゃん",
                "userId": "morichan",
                "description": "冷麺の季節ですね"
            }
        }
    ]
    }
}

account内にて「userId」に「ch」が含まれているもの

POST http://localhost:9200/test/account/_search
{ 
  "query": { 
    "match": {
       "userId" : "ch" 
    }
  }
}

結果

{
    "took": 2,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
    },
    "hits": {
        "total": 2,
        "max_score": 0.28582606,
        "hits": [
        {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOWMrNr31zgqLSH0n",
            "_score": 0.28582606,
            "_source": {
                "id": "2",
                "name": "もりちゃん",
                "userId": "morichan",
                "description": "冷麺の季節ですね"
            }
        },
        {
            "_index": "test",
            "_type": "account",
            "_id": "AV0qOYLCNr31zgqLSH0o",
            "_score": 0.26742277,
            "_source": {
                "id": "3",
                "name": "どんちゃん",
                "userId": "donchan",
                "description": "@morichanとは大の仲良し。また一緒に渋谷で冷麺食べにいきたいね!もりちゃん隊"
            }
        }
        ]
    }
}
2
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
6