ES中添加 IK 分词器

原创
2019/08/21 15:27
阅读数 1.6W

1.从github中下载IK分词器,一定要注意和ES的版本一致

https://github.com/medcl/elasticsearch-analysis-ik/releases

 

2 .下载之后放到 ES 的 \plugins 目录下面去  重启 ES 服务

 

测试:http://localhost:9200/blog1/_analyze    

{
   "text":"中华人民共和国MN","tokenizer": "ik_max_word"
}

结果:

{
    "tokens": [
        {
            "token": "中华人民共和国",
            "start_offset": 0,
            "end_offset": 7,
            "type": "CN_WORD",
            "position": 0
        },
        {
            "token": "中华人民",
            "start_offset": 0,
            "end_offset": 4,
            "type": "CN_WORD",
            "position": 1
        },
        {
            "token": "中华",
            "start_offset": 0,
            "end_offset": 2,
            "type": "CN_WORD",
            "position": 2
        },
        {
            "token": "华人",
            "start_offset": 1,
            "end_offset": 3,
            "type": "CN_WORD",
            "position": 3
        },
        {
            "token": "人民共和国",
            "start_offset": 2,
            "end_offset": 7,
            "type": "CN_WORD",
            "position": 4
        },
        {
            "token": "人民",
            "start_offset": 2,
            "end_offset": 4,
            "type": "CN_WORD",
            "position": 5
        },
        {
            "token": "共和国",
            "start_offset": 4,
            "end_offset": 7,
            "type": "CN_WORD",
            "position": 6
        },
        {
            "token": "共和",
            "start_offset": 4,
            "end_offset": 6,
            "type": "CN_WORD",
            "position": 7
        },
        {
            "token": "国",
            "start_offset": 6,
            "end_offset": 7,
            "type": "CN_CHAR",
            "position": 8
        },
        {
            "token": "mn",
            "start_offset": 7,
            "end_offset": 9,
            "type": "ENGLISH",
            "position": 9
        }
    ]
}
  1. ik_max_word 和 ik_smart 什么区别?

ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合,适合 Term Query;

ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”,适合 Phrase 查询。

# 测试分词器
GET _analyze
{
  "analyzer": "ik_smart",
  "text": "我爱你中国"
}

GET _analyze
{
  "analyzer": "ik_max_word",
  "text": "我爱你中国"
}


# 存储数据
PUT /test3/_doc/1
{
  "name":"施爷",
  "age":13,
  "birth":"2020-07-05"
}

# 修改数据 (全部修改,birth没有会被删除)
PUT /test3/_doc/1
{
  "name":"施爷222",
  "age":13
}

# 修改数据,只会修改name这个属性,别的不会变
POST /test3/_doc/1/_update
{
  "doc":{
    "name":"我是用post方式进行了修改"
  }
}


# 获取对象结构
GET /test3

# 通过id来获取文档
GET /test3/_doc/1

# 查看数据库中全部存储的统计信息
GET _cat/indices?v

# 删除数据
DELETE /test3/_doc/1


# 存储,跟新数据
PUT /shiye/user/6
{
  "name":"shiye施爷成绩好",
  "age":30,
  "desc":"一看操作猛如虎,一战战绩0-5",
  "tags":["靓仔","旅游","爬山"]
}

# 通过id来查询数据
GET /shiye/user/A001

 # 通过名称搜索
 GET /shiye/user/_search?q=name:shiye
 
 # 通过构建复杂查询,查询指定属性_source
 GET /shiye/user/_search
 {
   "query": {
     "match": {
       "name": "shiye"
     }
   },
   "_source":["name","desc","age"]
 }

# must查询 相当于 and
GET /shiye/user/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "shiye"
          }
        },{
          "match": {
            "name": "施爷"
          }
        }
      ]
    }
  }
}


# should 查询,相当于or
GET /shiye/user/_search
{
  "query": {
    "bool": {
      "should": [
        {
          "match": {
            "name": "shiye"
          }
        },{
          "match": {
            "name": "施爷"
          }
        }
      ]
    }
  }
}

# 加上 must+filter 
GET /shiye/user/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "shiye"
          }
        }
      ],
      "filter": {
        "range": {
          "age": {
            "gte": 20,
            "lte": 40
          }
        }
      }
    }
  }
}

# 查询 tags 中匹配到山的
GET /shiye/user/_search
{
  "query": {
    "match": {
      "tags": "山"
    }
  }
}

#############测试 text,keyword ###############
# text     可以分词
# keyword  不分词
# 指定索引各个字段的创建规则
PUT testdb
{
  "mappings": {
    "properties":{
      "name":{
        "type":"text"
      },
      "desc":{
        "type": "keyword"
      }
    }
  }
}

# 添加数据
PUT testdb/_doc/2
{
  "name":"武松",
  "desc":"打老虎"
}

# 查询
GET testdb/_search
{
  "query": {
    "match": {
      "desc": "军事"
    }
  }
}

#查询+高亮
GET testdb/_search
{
  "query": {
    "match": {
      "name": "施"
    }
  },
  "highlight": {
    "pre_tags": "<p style='color:red'>", 
    "post_tags": "</p>", 
    "fields": {"name":{}}
  }
}



 

 

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部