1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 9 16:47:12 2018
@author: zhouyujiang
"""
import elasticsearch
import datetime
from elasticsearch.helpers import scan
import elasticsearch_7 as e7
# import pandas as pd
import json
# from func_cal_doc_id import cal_doc_id
# from urllib import parse
hosts = '172.16.31.17'
port = 9000
es2 = elasticsearch.Elasticsearch(hosts=hosts, port=port)
es7 = e7.Elasticsearch(hosts="172.16.52.27", port=9200,http_auth=("elastic","gengmei!@#"))
# es7 = e7.Elasticsearch(hosts="172.16.31.17", port=9000)
target_index = 'gm-dbmw-doctor-read'
target_type = 'doctor'
from_index = 'gm-dbmw-doctor-read'
from_type = '_doc'
bulk_all_body = ''
search_body = {
"query": {
"bool": {
"filter": [
]
}
}
}
es2_re = es2.search(index=target_index, doc_type=target_type,
body=search_body)
es7_re = es7.search(index=target_index, doc_type=target_type,
body=search_body)
print(es7_re)
#
# q3_total = es2_re['hits']['total']
write_into_scan = scan(client=es2,
query=search_body,
index=target_index,
doc_type=target_type,
scroll='5m',
request_timeout=100
)
count = 0
for one_scan in write_into_scan:
count = count + 1
line = one_scan['_source']
doc_id = one_scan['_id']
try:
if line['facevideo_is_assistant'] == 1:
line['facevideo_is_assistant'] = True
else:
line['facevideo_is_assistant'] = False
except:
pass
bulk_head = '{"index": {"_id":"%s"}}' % doc_id
data_str = json.dumps(line, ensure_ascii=False)
bulk_one_body = bulk_head + '\n' + data_str + '\n'
# print(bulk_one_body)
bulk_all_body += bulk_one_body
# break
if count%500 == 0:
eror_dic=es7.bulk(index=target_index,
body=bulk_all_body, request_timeout=200)
bulk_all_body=''
if eror_dic['errors'] is True:
print(eror_dic['items'])
print(bulk_all_body)
print(count)
if bulk_all_body != '':
eror_dic = es7.bulk(body=bulk_all_body,
index=target_index,
request_timeout=200)
if eror_dic['errors'] is True:
print(eror_dic)
bulk_all_body = ''