es_target_releasers.py 3.16 KB
# -*- coding: utf-8 -*-
"""
Created on Wed Jun  6 18:13:14 2018

@author: hanye
"""
import json #, redis
import random
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan

hosts = '172.18.52.14'
port = 9200

HTTP_AUTH = ("elastic", "gm_test")
es_framework = Elasticsearch(hosts=hosts, port=port, http_auth=HTTP_AUTH)

index_target_releaser = 'target_releasers'


def bulk_write_target_releasers(dict_Lst,
                                index=index_target_releaser,):
    bulk_write_body=''
    write_counter=0
    for line in dict_Lst:
        write_counter+=1
        try:
            releaser=line['releaser']
            platform=line['platform']
            doc_id_releaser='%s_%s' % (platform, releaser)
            action_str=('{ "index" : { "_index" : "%s","_id" : "%s" } }'
                        % (index_target_releaser, doc_id_releaser))
            data_str=json.dumps(line, ensure_ascii=False)
            line_body = action_str + '\n' + data_str + '\n'
            bulk_write_body += line_body
        except:
            print('ill-formed data', line)
        if write_counter%1000==0 or write_counter==len(dict_Lst):
            print('Writing into es %d/%d' % (write_counter, len(dict_Lst)))
            if bulk_write_body!='':
                es_framework.bulk(index=index_target_releaser,body=bulk_write_body, request_timeout=100)


def get_releaserUrls_from_es(platform,
                             releaser=None,
                             frequency=None,
                             target_index=None,
                             project_tags=[]):
    search_body = {"query": {"bool": {"filter": [{"term": {"platform": platform}}]}}}
    if releaser is not None:
        releaser_dict = {"term": {"releaser": releaser}}
        search_body['query']['bool']['filter'].append(releaser_dict)
    # if frequency is not None:
    #     frequency_dict = {"range": {"frequency": {"gte": frequency}}}
    #     search_body['query']['bool']['filter'].append(frequency_dict)
    # if project_tags:
    #     frequency_dict = {"terms":{"project_tags.keyword":project_tags}}
    #     search_body['query']['bool']['filter'].append(frequency_dict)
    # print(target_index,doc_type_target_releaser,search_body)
    print(search_body)
    search_resp= es_framework.search(index=target_index,
                                     body=search_body,
                                     size=0,
                                     request_timeout=100)
    total_hit = search_resp['hits']['total']["value"]
    print(search_resp)
    releaserUrl_Lst = []
    if total_hit > 0:
        print('Got %d releaserUrls for platform %s.' % (total_hit, platform))
        scan_resp = scan(client=es_framework, query=search_body,
                         index=target_index,
                         request_timeout=200)
        for line in scan_resp:
            try:
                releaserUrl = line['_source']['releaserUrl']
                releaser = line['_source']['releaser']

                releaserUrl_Lst.append((releaserUrl,releaser))
            except:
                print('error in :', line)
                continue
    else:
        print('Got zero hits.')
    return releaserUrl_Lst