import elasticsearch
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])
# es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}], \
# http_auth=('elastic', 'password'),\
# timeout=10, \
# max_retries=1, \
# retry_on_timeout=False)
# create index
myindex = "shigebaobao"
faqbody = {
"mappings": {
"properties": {
"id": {"type": "integer"},
"question": {
"type": "text",
"analyzer": "index_ansj"
},
"answer": {"type": "text"},
"domain": {"type": "keyword"},
"update_time": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}
}
}
}
res = es.indices.create(index=myindex, ignore=400, body=faqbody)
print(res)
import elasticsearch
from elasticsearch import helpers
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])
import datetime
data = pd.read_excel("all_data.xlsx")
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
data.fillna({"images": "", "files": ""}, inplace=True)
data.rename(columns={"Products": "products"}, inplace=True)
data["update_time"] = now
data["is_deleted"] = 0
rows = data.shape[0]
for i in range(0, rows, 1000):
action = []
for k in range(i, min(i + 1000, rows)):
d = data.loc[[k]].to_dict("records")[0]
d["id"] = k
d["flag"] = int(d["flag"])
d["products"] = list(eval(d["products"]))
action.append({
"_index": myindex,
"_type": "_doc",
"_id": str(k),
"_source": d
})
helpers.bulk(es, action)
ysj_dict = df.to_dict("records") # dataframe to dict
from tqdm import tqdm
for i in tqdm(range(len(ysj_dict))):
d = ysj_dict[i]
result = es.index(index='myindex', doc_type='_doc', body=d, id=i)
print("DONE!!!!!")
import elasticsearch
es = elasticsearch.Elasticsearch(hosts=[{"host": "ip", "port": 80}])
dsl = {
"query": {
"bool": {
"must": [
{"match": {"question": {"query": "陆大人的袁宝宝", "analyzer": "index_ansj"}}}
],
"filter": [
{"term": {"is_deleted": 0}}
]
}
},
"size": 5
}
res = es.search(index="shigebaobao", body=dsl)
result = []
for d in res["hits"]["hits"]:
score = d["_score"]
print(d["_source"]["question"])
修改已知id的某字段值
from elasticsearch import Elasticsearch
client = Elasticsearch(
"ip:80", timeout=10, max_retries=1, retry_on_timeout=False
)
dsl ={
"doc":{"is_deleted":1}
}
# id此范围内为 待删数据
for k in range(18721,18938):
client.update("shigebaobao",k,body=dsl)