1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 6 18:13:14 2018
@author: hanye
"""
import json #, redis
import random
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
hosts = '172.18.52.14'
port = 9200
HTTP_AUTH = ("elastic", "gm_test")
es_framework = Elasticsearch(hosts=hosts, port=port, http_auth=HTTP_AUTH)
index_target_releaser = 'target_releasers'
def bulk_write_target_releasers(dict_Lst,
index=index_target_releaser,):
bulk_write_body=''
write_counter=0
for line in dict_Lst:
write_counter+=1
try:
releaser=line['releaser']
platform=line['platform']
doc_id_releaser='%s_%s' % (platform, releaser)
action_str=('{ "index" : { "_index" : "%s","_id" : "%s" } }'
% (index_target_releaser, doc_id_releaser))
data_str=json.dumps(line, ensure_ascii=False)
line_body = action_str + '\n' + data_str + '\n'
bulk_write_body += line_body
except:
print('ill-formed data', line)
if write_counter%1000==0 or write_counter==len(dict_Lst):
print('Writing into es %d/%d' % (write_counter, len(dict_Lst)))
if bulk_write_body!='':
es_framework.bulk(index=index_target_releaser,body=bulk_write_body, request_timeout=100)
def get_releaserUrls_from_es(platform,
releaser=None,
frequency=None,
target_index=None,
project_tags=[]):
search_body = {"query": {"bool": {"filter": [{"term": {"platform": platform}}]}}}
if releaser is not None:
releaser_dict = {"term": {"releaser": releaser}}
search_body['query']['bool']['filter'].append(releaser_dict)
# if frequency is not None:
# frequency_dict = {"range": {"frequency": {"gte": frequency}}}
# search_body['query']['bool']['filter'].append(frequency_dict)
# if project_tags:
# frequency_dict = {"terms":{"project_tags.keyword":project_tags}}
# search_body['query']['bool']['filter'].append(frequency_dict)
# print(target_index,doc_type_target_releaser,search_body)
print(search_body)
search_resp= es_framework.search(index=target_index,
body=search_body,
size=0,
request_timeout=100)
total_hit = search_resp['hits']['total']["value"]
print(search_resp)
releaserUrl_Lst = []
if total_hit > 0:
print('Got %d releaserUrls for platform %s.' % (total_hit, platform))
scan_resp = scan(client=es_framework, query=search_body,
index=target_index,
request_timeout=200)
for line in scan_resp:
try:
releaserUrl = line['_source']['releaserUrl']
releaser = line['_source']['releaser']
releaserUrl_Lst.append((releaserUrl,releaser))
except:
print('error in :', line)
continue
else:
print('Got zero hits.')
return releaserUrl_Lst