# -*- coding: utf-8 -*-
"""
Created on Wed Jun  6 18:18:09 2018

@author: hanye
"""
#import redis
#from crawler_sys.framework.platform_redis_register import get_redis_list_name
from crawler.crawler_sys.framework.es_crawler import scan_crawler_url_register

#rds = redis.StrictRedis(host='192.168.17.60', port=6379, db=0)

def pull_url_from_es(platform, release_time_lower_bdr=None):
    """
    Just pull urls from es index crawler-url-register.
    Url reforming things will be done in the method who
    is responsible for pushing urls into redis.
    Just return url and its platform 
    """
    if release_time_lower_bdr is None:
        release_time_lower_bdr = 0
    else:
        pass
    search_body = {"query": {"bool": {"filter": [{"range": {"release_time":
                                                 {"gte": release_time_lower_bdr}}},
                                                 {"term": {"platform.keyword": platform}}]}}}
    total_hit, scan_resp = scan_crawler_url_register(search_body)
    batch_url_Lst = []
    if total_hit > 0:
        line_counter = 0
        for line in scan_resp:
            line_counter += 1
            line_d = line['_source']
            url = line_d['url']
            batch_url_Lst.append(url)
    else:
        pass
    return batch_url_Lst


#def url_reformer(platform, url):
#    """
#    to reform url according to platform, in the future.
#    Say, a url of http://www.toutiao.com/group/1234567890123456789
#    as a string is different from http://www.365yg.com/u/1234567890123456789,
#    but they point to the same resource. They should be reformed
#    to one unique url before pushing into redis for futher crawling.
#    """
#    reformed_url = url
#    return reformed_url
#
#def feed_url_into_redis(dict_Lst, platform,
#                        release_time_lower_bdr=None,
#                        batch_str=None):
#    """
#    release_time_lower_bdr must be an int value represent
#    timestamp in milliseconds if given.
#    All url that is released before release_time_lower_bdr
#    will not be pushed into redis. If argument release_time_lower_bdr
#    is not given when call this function, all urls will be
#    pushed into redis.
#    """
#    redis_list_name = get_redis_list_name(platform, batch_str)
#    if redis_list_name is None:
#        print('Failed to get correct redis list name '
#              'in platform_redis_register for platform: '
#              % platform)
#        return (None, None)
#    else:
#        print('Feeding url into redis list %s ...' % redis_list_name)
#        url_counter = 0
#        for data_dict in dict_Lst:
#            try:
#                url = data_dict['url']
#                url_reformed = url_reformer(platform, url)
#                if release_time_lower_bdr is None:
#                    sadd_c = rds.sadd(redis_list_name, url_reformed)
#                    url_counter += sadd_c
#                else:
#                    url_release_time = data_dict['release_time']
#                    if url_release_time >= release_time_lower_bdr:
#                        sadd_c = rds.sadd(redis_list_name, url_reformed)
#                        url_counter += sadd_c
#            except:
#                print('Failed to push url into redis, '
#                      'might because of lack of url field '
#                      'or lack of release_time field, or '
#                      'has wrong typed release_time value. '
#                      'The failed data dict is: \n %s' % data_dict)
#        print('Pushed %d urls into redis' % url_counter)
#        return (redis_list_name, url_counter)