Elasticsearch是一个基于Lucene库的搜索引擎,属于面向文档的数据库。它提供了一个分布式、支持多用户的全文搜索引擎,具有HTTP Web接口和无模式JSON文档,Elasticsearch是用Java开发的,但是其他语言中都是可用的。
elasticsearch的搜索原理是利用倒排索引,也被称为反向索引,被用来存储在全文搜索下某个单词在一个文档或者一组文档中的存储位置的映射。它是文档检索系统中最常用的数据结构。
elasticsearch与关系型数据做个对比:
关系型数据库 | elasticsrarch |
---|---|
databases | indices |
tables | types |
rows | documents |
columns | fields |
elasticsearch(集群)中可以包含多个索引,每个索引中可以包含多个类型,每个类型下又包含多个文档,每个文档中又包含多个字段。
逻辑设计:索引 》 类型 》 文档
GET _cat/indices?v
health status index uuid pri rep docs.count docs.deleted store.size pri.store.size
yellow open t1 U0RWARsvTUunQXLAN8l61A 5 1 1 0 4.9kb 4.9kb
字段类型随机映射
PUT t1/doc/1{
"name":"张三",
"age":20,
"birthday":"2002-10-01"
}
GET t1 #查看创建信息,通过restful的风格返回
{
"t1": {
"aliases": {},
"mappings": {
"doc": {
"properties": {
"age": {
"type": "long"
},
"birthday": {
"type": "date"
},
"name": {
"type": "text",# 字段随机映射成相应的类型
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1661426202423",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "U0RWARsvTUunQXLAN8l61A",
"version": {
"created": "6030299"
},
"provided_name": "t1"
}
}
}
}
提前映射,跟mysql一样先创建表结构
PUT t2
{
"mappings": {
"doc":{
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
},
"birthday":{
"type": "date"
}
}
}
}
}
查看指定的文档
GET t2/doc/2
查看该索引类型下所有文档
GET t2/doc/_search
使用put修改,必须所有字段都要有
PUT t2/doc/1
{
"name": "张三",
"age": 18,
"birthday":"2002-10-01"
}
put+update,可以单个修改某个字段
POST t2/doc/1/_update
{
"doc": {
"birthday":"2004-10-01"
}
}
DELETE t2/doc/2
**字符串查询**
GET t2/doc/_search?q=age:22
**结构化查询**
GET t2/doc/_search
{
"query":{
"match": {
"age": 22
}
}
}
# 查询全部文档
GET t2/doc/_search
{
"query":{
"match_all": {}
}
}
#结果
{
"took": 20,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 1,
"hits": [
{
"_index": "t2",
"_type": "doc",
"_id": "5",
"_score": 1,
"_source": {
"name": "刘七",
"age": 22,
"birthday": "1998-08-01",
"desc": "猫是老虎的师父"
}
},
{
"_index": "t2",
"_type": "doc",
"_id": "4",
"_score": 1,
"_source": {
"name": "赵六",
"age": 24,
"birthday": "1998-08-01",
"desc": "波斯猫不吃老鼠,也不爱啃骨头"
}
},
{
"_index": "t2",
"_type": "doc",
"_id": "2",
"_score": 1,
"_source": {
"name": "李四",
"age": 20,
"birthday": "2002-10-01",
"desc": "老鼠喜欢吃大米"
}
},
{
"_index": "t2",
"_type": "doc",
"_id": "1",
"_score": 1,
"_source": {
"name": "张三",
"age": 18,
"birthday": "2002-10-01",
"desc": "猫喜欢吃老鼠"
}
},
{
"_index": "t2",
"_type": "doc",
"_id": "3",
"_score": 1,
"_source": {
"name": "王五",
"age": 22,
"birthday": "2004-11-01",
"desc": "狗喜欢啃骨头"
}
}
]
}
}
elasticsearch对中文是一个一个字分词,match匹配会根据分词后去查询文档,比如查王五,则会把王八也查询出来,而match_phrase会按短语查询
GET t2/doc/_search
{
"query":{
"match_phrase": {
"name": "王五" #只会查询王五
}
}
}
# 查询出两组词中间间隔为2的文档
GET t2/doc/_search
{
"query": {
"match_phrase": {
"desc": {
"query": "狗啃骨头",#将狗喜欢啃骨头查询出来
"slop":2
}
}
}
}
# term精确查询,是没有经过分析的查询关键字
# term跟keyword类型字段结合,跟text字段类型不会返回任何结果,否则用match
GET t3/doc/_search
{
"query":{
"term":{ # 精确查询
"desc":"猫是老虎的师父" # keyword类型
}
}
}
GET t2/doc/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name":"王五"
}
},
{
"match": {
"age":22
}
}
]
}
}
}
GET t2/doc/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"name":"王五"
}
},
{
"match": {
"age":22
}
}
]
}
}
}
GET t2/doc/_search
{
"query": {
"bool": {
"must_not": [
{
"match": {
"name":"王五"
}
},
{
"match": {
"age":22
}
}
]
}
}
}
# 过滤,和must should 同级,
GET t2/doc/_search
{
"query": {
"bool": {
"should": [
{
"match": {
"name":"王五"
}
},
{
"match": {
"age":22
}
}
],
"filter": {
"range": { #gt小于gte小于或等于lt大于lte大于或等于
"age": {
"gte": 21,
"lte": 30
}
}
}
}
}
}
# asc 升序 desc降序
GET t2/doc/_search
{
"sort": [
{
"age": {
"order": "desc"
}
}
]
}
GET t2/doc/_search
{
"query": {
"match_all": {}
},
"_source": ["name"] #只返回name字段
}
# 从0开始,查询出2条记录
GET t2/doc/_search
{
"from":0,
"size":2
}
# 聚合函数有:agv max min sum
# 按年龄分组,求平均值
GET t2/doc/_search
{
"aggs": {
"myAGV": { # 自定义名
"avg": { #聚合函数
"field": "age" # 按什么字段分组
}
}
},
"_source": false # 只显示分组聚合的值
}
# 分组查询
GET t2/doc/_search
{
"aggs":{
"myGroup":{ # 分组名
"range": {
"field": "age", #分组字段
"ranges": [
{
"from": 20, #范围分组
"to": 22
},
{
"from": 22,
"to": 24
},
{
"from": 24,
"to": 26
}
]
}
}
},
"_source": false
}
GET t2/doc/_search
{
"query": {
"match": {
"name": "刘七"
}
},
"highlight": {
"pre_tags": "<b class='a1' style={color:red}>",
"post_tags": "</b>",
"fields": {
"name": {}
}
}
}
# 返回结果
highlight": {
"name": [
"<b class='a1' style={color:red}>刘</b><b class='a1' style={color:red}>七</b>"
]
}
PUT t3
{
"mappings": {
"doc":{
"properties": {
"name":{
"type": "keyword"
}
}
}
},
"settings": {
"number_of_replicas": 1,
"number_of_shards": 5
}
}
POST _analyze
{
"analyzer": "standard",
"text":"波斯猫不吃老鼠&也不爱啃骨头"
}
# 一个一个中文进行分词
{
"tokens": [
{
"token": "波",
"start_offset": 0,
"end_offset": 1,
"type": "<IDEOGRAPHIC>",
"position": 0
},
{
"token": "斯",
"start_offset": 1,
"end_offset": 2,
"type": "<IDEOGRAPHIC>",
"position": 1
},
{
"token": "猫",
"start_offset": 2,
"end_offset": 3,
"type": "<IDEOGRAPHIC>",
"position": 2
},
{
"token": "不",
"start_offset": 3,
"end_offset": 4,
"type": "<IDEOGRAPHIC>",
"position": 3
},
{
"token": "吃",
"start_offset": 4,
"end_offset": 5,
"type": "<IDEOGRAPHIC>",
"position": 4
},
{
"token": "老",
"start_offset": 5,
"end_offset": 6,
"type": "<IDEOGRAPHIC>",
"position": 5
},
{
"token": "鼠",
"start_offset": 6,
"end_offset": 7,
"type": "<IDEOGRAPHIC>",
"position": 6
},
{
"token": "也",
"start_offset": 8,
"end_offset": 9,
"type": "<IDEOGRAPHIC>",
"position": 7
},
{
"token": "不",
"start_offset": 9,
"end_offset": 10,
"type": "<IDEOGRAPHIC>",
"position": 8
},
{
"token": "爱",
"start_offset": 10,
"end_offset": 11,
"type": "<IDEOGRAPHIC>",
"position": 9
},
{
"token": "啃",
"start_offset": 11,
"end_offset": 12,
"type": "<IDEOGRAPHIC>",
"position": 10
},
{
"token": "骨",
"start_offset": 12,
"end_offset": 13,
"type": "<IDEOGRAPHIC>",
"position": 11
},
{
"token": "头",
"start_offset": 13,
"end_offset": 14,
"type": "<IDEOGRAPHIC>",
"position": 12
}
]
}
POST _analyze
{
"analyzer": "ik_max_word",
"text":"波斯猫不吃老鼠&也不爱啃骨头"
}
# 根据中文词组分
{
"tokens": [
{
"token": "波斯猫",
"start_offset": 0,
"end_offset": 3,
"type": "CN_WORD",
"position": 0
},
{
"token": "波斯",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 1
},
{
"token": "猫",
"start_offset": 2,
"end_offset": 3,
"type": "CN_CHAR",
"position": 2
},
{
"token": "不吃",
"start_offset": 3,
"end_offset": 5,
"type": "CN_WORD",
"position": 3
},
{
"token": "老鼠",
"start_offset": 5,
"end_offset": 7,
"type": "CN_WORD",
"position": 4
},
{
"token": "也",
"start_offset": 8,
"end_offset": 9,
"type": "CN_CHAR",
"position": 5
},
{
"token": "不爱",
"start_offset": 9,
"end_offset": 11,
"type": "CN_WORD",
"position": 6
},
{
"token": "啃骨头",
"start_offset": 11,
"end_offset": 14,
"type": "CN_WORD",
"position": 7
},
{
"token": "骨头",
"start_offset": 12,
"end_offset": 14,
"type": "CN_WORD",
"position": 8
}
]
}
POST _analyze
{
"analyzer": "simple",
"text":"波斯猫 不吃 老鼠&也不爱啃骨头"
}
# 根据空格,字符来分词
{
"tokens": [
{
"token": "波斯猫",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 0
},
{
"token": "不吃",
"start_offset": 4,
"end_offset": 6,
"type": "word",
"position": 1
},
{
"token": "老鼠",
"start_offset": 7,
"end_offset": 9,
"type": "word",
"position": 2
},
{
"token": "也不爱啃骨头",
"start_offset": 10,
"end_offset": 16,
"type": "word",
"position": 3
}
]
}
PUT t1
{
"mappings": {
"doc":{
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
},
"birthday":{
"type": "date"
},
"desc":{
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
}
GET t4/doc/_search
{
"suggest":{
"mySuggest":{ # 自定义建议器名称
"text": "dgo", # 建议内容,关键字
"term":{ #建议类型,词条建议器
"field":"desc" #建议字段,根据某个字段进行建议
}
}
}
}
PUT t6
{
"mappings": {
"doc":{
"properties": {
"name":{
"type": "text",
"copy_to": "all" # copy_to允许我们将多个字段的值复制到组字段中,然后将组字段作为单个字段进行查询
},
"age":{
"type": "integer"
},
"birthday":{
"type": "date"
},
"desc":{
"type": "text",
"copy_to": "all"
},
"all": { #copy_to到all这个字段
"type": "text",
"analyzer": "ik_max_word"
}
}
}
}
}
GET t1/doc/_search
{
"suggest":{
"mySuggest":{
"text": "dgo",#进行纠错,dgo => dog
"phrase":{
"field":"desc"
}
}
}
}
completion提供自动完成、补全搜索功能,可以在用户输入时引导用户查看相关结果,从而提高搜索精度
PUT t1
{
"mappings": {
"doc":{
"properties": {
"name":{
"type": "text"
},
"age":{
"type": "integer"
},
"birthday":{
"type": "date"
},
"desc":{
"type": "completion", # 完成建议器
"analyzer": "ik_max_word"
}
}
}
}
}
GET t7/doc/_search
{
"suggest":{
"mySuggest":{
"text": "do", #自动补全,比如dog
"completion":{
"field":"desc"
}
}
}
}
GET t7/doc/_search
{
"suggest":{
"mySuggest":{
"prefix": "c",#自动补全,cat
"completion":{
"field":"desc"
}
}
}
}
from elasticsearch import Elasticsearch
# 默认host=localhost port=9200
es = Elasticsearch(
["192.168.1.1", "192.168.1.2", "192.168.1.3"], # 连接集群
sniff_on_start=True, # 连接前测试
sniff_on_connection_fail=True, # 节点无响应时刷新节点
sniff_timeout=60 # 设置超时时间
)
# 向指定索引添加或更新文档(必须全字段都有),如果索引不存在,首先会创建该索引
es.index(index='t1', doc_type='doc', id='1', body={"name":"张三", "age": 18})
# 提前映射各字段类型创建
mapping = {
"mappings":{
"doc":{
"properties":{
"name":{
"type":"text"
},
"age":{
"type":"integer",
},
"birthday":{
"type":"date"
}
}
}
}
}
print(es.indices.create("t2",body=mapping))
#只能新增,重复执行会报错
es.create(index="t2",doc_type="doc",id=1,body={"name":"张三","age":20,"birthday":"2000-10-01"})
#重复则更新
es.index(index='t1', doc_type='doc', id='1', body={"name":"张三", "age": 18})
#删除指定的文档
es.delete(index='t2', doc_type='doc', id='1')
#删除与查询匹配的所有文档
es.delete_by_query(index='t1', doc_type='doc', body={"query": {"match":{"age": 20}}})
#删除索引
es.indices.delete(index='t1')
#查询索引中指定文档
es.get(index='t2', doc_type='doc', id=1)
#匹配查询
es.search(index='t2', doc_type='doc', body={"query": {"match":{"age": 20}}})