elasticsearch调用

相旭
2023-12-01

建数据库

import elasticsearch
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])
# es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}], \
#                                  http_auth=('elastic', 'password'),\
#                                  timeout=10, \
#                                  max_retries=1, \
#                                  retry_on_timeout=False)

# create index
myindex = "shigebaobao"
faqbody = {
    "mappings": {
        "properties": {
            "id": {"type": "integer"},
            "question": {
                "type": "text",
                "analyzer": "index_ansj"
            },
            "answer": {"type": "text"},
            "domain": {"type": "keyword"},
            "update_time": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}
        }
    }
}
res = es.indices.create(index=myindex, ignore=400, body=faqbody)
print(res)

批量插入数据

import elasticsearch
from elasticsearch import helpers
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])

import datetime

data = pd.read_excel("all_data.xlsx")
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
data.fillna({"images": "", "files": ""}, inplace=True)
data.rename(columns={"Products": "products"}, inplace=True)
data["update_time"] = now
data["is_deleted"] = 0

rows = data.shape[0]
for i in range(0, rows, 1000):
    action = []
    for k in range(i, min(i + 1000, rows)):
        d = data.loc[[k]].to_dict("records")[0]
        d["id"] = k
        d["flag"] = int(d["flag"])
        d["products"] = list(eval(d["products"]))
        action.append({
            "_index": myindex,
            "_type": "_doc",
            "_id": str(k),
            "_source": d
        })
    helpers.bulk(es, action)

逐条插入

ysj_dict = df.to_dict("records")    # dataframe to dict

from tqdm import tqdm
for i in tqdm(range(len(ysj_dict))):
    d = ysj_dict[i]
    result = es.index(index='myindex', doc_type='_doc', body=d, id=i)
    
print("DONE!!!!!")

查询数据

import elasticsearch
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])
dsl = {
        "query": {
            "bool": {
                "must": [
                    {"match": {"question": {"query": "陆大人的袁宝宝", "analyzer": "index_ansj"}}}
                ],
                "filter": [
                    {"term": {"is_deleted": 0}}
                ]
            }
        },
        "size": 5
}

res = es.search(index="shigebaobao", body=dsl)

result = []
for d in res["hits"]["hits"]:
    score = d["_score"]
    print(d["_source"]["question"])

更新数据

修改已知id的某字段值

from elasticsearch import Elasticsearch
client = Elasticsearch(
    "ip:80", timeout=10, max_retries=1, retry_on_timeout=False
)

dsl ={
    "doc":{"is_deleted":1} 
}

# id此范围内为 待删数据
for k in range(18721,18938):
    client.update("shigebaobao",k,body=dsl)
 类似资料: