本文基于<<python操作ElasticSearch–文档增删改查>>进行创作
# 搜索全部商品
GET /ecommerce/product/_search
import json
import requests
response = requests.get("http://ip:9200/ecommerce/product/_search")
print(json.dumps(response.json()))
"""
took:耗费了几毫秒
timed_out:是否超时,这里是没有
_shards:数据拆成了5个分片,所以对于搜索请求,会打到所有的primary shard(或者是它的某个replica shard也可以)
hits.total:查询结果的数量,3个document
hits.max_score:score的含义,就是document对于一个search的相关度的匹配分数,越相关,就越匹配,分数也高
hits.hits:包含了匹配搜索的document的详细数据
query string search的由来,因为search参数都是以http请求的query string来附带的
搜索商品名称中包含yagao的商品,而且按照售价降序排序:GET /ecommerce/product/_search?q=name:yagao&sort=price:desc
适用于临时的在命令行使用一些工具,比如curl,快速的发出请求,来检索想要的信息;但是如果查询请求很复杂,是很难去构建的
在生产环境中,几乎很少使用query string search
"""
# pip install elasticsearch-dsl==6.0.0
"""
DSL:Domain Specified Language,特定领域的语言
http request body:请求体,可以用json的格式来构建查询语法,比较方便,可以构建各种复杂的语法,比query string search肯定强大多了
"""
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search
es = Elasticsearch(hosts="ip:port")
# s = Search()
# s = s.using(es)
# 构建对象
s = Search(using=es ,index="ecommerce",doc_type="product")
查询所有的商品
GET /ecommerce/product/_search
{
"query": { "match_all": {} }
}
s_dsl = s.query("match_all")
response = s_dsl.execute()
print(response.to_dict())
查询名称包含yagao的商品,同时按照价格降序排序
GET /ecommerce/product/_search
{
"query" : {
"match" : {
"name" : "yagao"
}
},
"sort": [
{ "price": "desc" }
]
}
s_dsl = s.query("match",name="yagao").sort({ "price": "desc" })
response = s_dsl.execute()
print(response.to_dict())
分页查询商品,总共3条商品,假设每页就显示1条商品,现在显示第2页,所以就查出来第2个商品
GET /ecommerce/product/_search
{
"query": { "match_all": {} },
"from": 1,
"size": 1
}
# 从第2个开始查询,查询1个 其中,from=0表示从第一个开始
s_dsl = s.query("match_all")[1:1] # {"from": 1, "size": 1}
response = s_dsl.execute()
print(response.to_dict())
指定要查询出来商品的名称和价格就可以
GET /ecommerce/product/_search
{
"query": { "match_all": {} },
"_source": ["name", "price"]
}
s_dsl = s.query("match_all").source(["name", "price"])
response = s_dsl.execute()
print(response.to_dict())
搜索商品名称包含yagao,而且售价大于25元的商品
GET /ecommerce/product/_search
{
"query" : {
"bool" : {
"must" : {
"match" : {
"name" : "yagao"
}
},
"filter" : {
"range" : {
"price" : { "gt" : 25 }
}
}
}
}
}
# 使用update_from_dict
s_dsl = s.update_from_dict({
"query": {
"bool": {
"must": {
"match": {
"name": "yagao"
}
},
"filter": {
"range": {
"price": {"gt": 25}
}
}
}
}
})
print(s_dsl.execute().to_dict())
# 使用q对象
from elasticsearch_dsl import Q
q = Q("bool", must=[Q('match', name='yagao')], filter=[Q("range", price={"gt": 25})])
s_dsl = s.query(q)
print(json.dumps(s_dsl.execute().to_dict(), indent=2, ensure_ascii=False))
GET /ecommerce/product/_search
{
"query" : {
"match" : {
"producer" : "yagao producer"
}
}
}
result = es.search(index="ecommerce", doc_type="product")
print(json.dumps(result, indent=2, ensure_ascii=False))
"""
producer这个字段,会先被拆解,建立倒排索引
special 4
yagao 4
producer 1,2,3,4
gaolujie 1
zhognhua 3
jiajieshi 2
"""
GET /ecommerce/product/_search
{
"query" : {
"match_phrase" : {
"producer" : "yagao producer"
}
}
}
"""
跟全文检索相对应,相反,全文检索会将输入的搜索串拆解开来,去倒排索引里面去一一匹配,只要能匹配上任意一个拆解后的单词,就可以作为结果返回
phrase search,要求输入的搜索串,必须在指定的字段文本中,完全包含一模一样的,才可以算匹配,才能作为结果返回
"""
body = {
"query" : {
"match_phrase" : {
"producer" : "yagao producer"
}
}
}
result = es.search(index="ecommerce", doc_type="product",body=body)
print(json.dumps(result, indent=2, ensure_ascii=False))
GET /ecommerce/product/_search
{
"query" : {
"match" : {
"producer" : "producer"
}
},
"highlight": {
"fields" : {
"producer" : {"fragment_size": 50}
}
}
}
s_dsl = s.query("match", producer= "producer").highlight("producer", fragment_size=50)
response = s_dsl.execute()
print(json.dumps(response.to_dict(), indent=2, ensure_ascii=False))
# 条件1 查询关键词必须包含Acrylic Fabric,关键词的id为3548, 29310需要排除在外
# 对条件1的结果进行过滤,关键词中不能出现"Polyester",关键词中要有"Wool"或"100%",只看前面1条数据
GET /index/type/_search
{
"query": {
"bool": {
"must": {
"bool": {
"should": [
{
"match_phrase": {
"keyword": "Acrylic Fabric"
}
}
]
}
},
"must_not": {
"terms": {
"id": [
3548,
29310
]
}
},
"filter": {
"bool": {
"must_not": [
{
"bool": {
"should": [
[
{
"match_phrase": {
"keyword": "Polyester"
}
}
]
]
}
}
],
"must": [
{
"bool": {
"should": [
[
{
"match_phrase": {
"keyword": "Wool"
}
}
],
[
{
"match_phrase": {
"keyword": "100%"
}
}
]
]
}
}
]
}
}
}
},
"from": "0",
"size": "1"
}
# 查询translation字段为"" 或者为 null 的数据
{
"query": {
"bool": {
"should": [
{
"term": {
"translation.keyword": ""
}
},
{
"bool": {
"must_not": [
{
"exists": {
"field": "translation"
}
}
]
}
}
]
}
}
}
# elasticsearch过滤纯数字
# 过滤多条件 or 或
{
"query": {
"bool": {
"must_not": [
{"regexp": {
"keyword.keyword": "[0-9]+"
}}
],
"must": {
"bool": {
"should": [
{
"match_phrase": {
"keyword": "11"
}
}
]
}
},
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"term": {
"competition.keyword": ""
}
}
]
}
},
{
"bool": {
"must_not": [
{
"term": {
"avg_monthly_search": 0
}
}
]
}
},
{
"bool": {
"must_not": [
{
"term": {
"search_result_sum": 0
}
}
]
}
}
]
}
}
}
}
}
# function_score实现自定义排序 按competition 高中低排序
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [
{
"filter": {
"term": {
"competition": "高"
}
},
"weight": 100
},
{
"filter": {
"term": {
"competition": "中"
}
},
"weight": 99
},
{
"filter": {
"term": {
"competition": "低"
}
},
"weight": 98
}
],
"score_mode": "first"
}
}
}
# 查询不重复
GET /dw_gofish_article/_search
{
"query": {
"term": {
"text_id.keyword": "302f75d9b009086d61b345314611850c"
}
},
"collapse": {
"field": "text_id.keyword"
}
}
# https://www.jianshu.com/p/62bed9cc8349
# 更新指定字段,ID存在则更新,不存在则插入
POST /dw_temp_test/_update/1
{
"doc": {
"name": "xxx"
},
"upsert": {
"name": "xxx"
}
}
# 查找重复的documents
GET /my_index/_search
{
"track_total_hits": true,
"size": 0,
"_source": ["my_field"],
"aggs": {
"duplicateCount": {
"terms": {
"field": "my_field.keyword",
"min_doc_count": 2
},
"aggs": {
"duplicateDocuments": {
"top_hits": {}
}
}
}
}
}
ElasticSearch-Query查询方式有以下几种
1、query string search
2、query DSL
3、query filter
4、full-text search
5、phrase search
6、highlight search
参考链接 https://elasticsearch-dsl.readthedocs.io/en/latest/index.html