# -*- coding: utf-8 -*-
"""
Created on Wed Apr 11 18:24:51 2018

1 build url set, in redis, as a process
2 use redis as a queue, start servel processes or threads to get url 
and search for the latest data in one-day range; each process write
the data it gets into a data queue
3 start another process to get data out from data_queue, and 
form bulk write body, write into es


@author: hanye
"""

from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
import datetime, sys
#from func_calculate_newTudou_video_id import calculate_newTudou_video_id
#from func_calculate_toutiao_video_id import calculate_toutiao_video_id
from func_cal_doc_id import cal_doc_id
import json

def func_write_daily_from_TK_by_fetch_time(fetch_day_str, seconds_from_00=25200,
                                           data_provider='fhtech',
                                           f_log=sys.stdout):

#    fetch_day_str='2018-04-09'
#    seconds_from_00=25200
    es=Elasticsearch(host='192.168.17.11', port=9200)
    
    index_TK='short-video-time-track'
    doc_type_TK='time-track'
    index_pro='short-video-production'
    doc_type_daily='daily-url'
    
    fetch_day_T=datetime.datetime.strptime(fetch_day_str, '%Y-%m-%d')
    fetch_time_start_T=fetch_day_T
    fetch_time_end_T=fetch_day_T+datetime.timedelta(seconds=seconds_from_00)
    fetch_time_start_ts=int(fetch_time_start_T.timestamp()*1e3)
    fetch_time_end_ts=int(fetch_time_end_T.timestamp()*1e3)
    
    print('Will calculate daily-url based on data from time-track within %.1f hours from 00:00' % (seconds_from_00/3600),
          datetime.datetime.now(), file=f_log)
    print('fetch_time_start: %s, fetch_time_end: %s' % (fetch_time_start_T, fetch_time_end_T),
          datetime.datetime.now(), file=f_log)
    
    search_body={
        "query": {
            "bool": {
                "filter": [
                    {"range": {"fetch_time": {"gte":fetch_time_start_ts, "lte":fetch_time_end_ts}}},
                    {"term": {"data_provider.keyword": data_provider}},
                    ]
                }
            }
        
        }
    
    search_resp=es.search(index=index_TK, doc_type=doc_type_TK, body=search_body, size=0,
                          request_timeout=100)
    total_hit=search_resp['hits']['total']
    print('Got total_hit: %d,' % (total_hit),
          datetime.datetime.now(), file=f_log)
    scan_resp=scan(client=es, query=search_body, preserve_order=False,
                   index=index_TK, doc_type=doc_type_TK,
                   request_timeout=300)
    
    # write daily-url 
    bulk_body_daily = ''
    
    line_counter=0
    for line in scan_resp:
        line_counter+=1
        
        line_dict=line['_source']
        url=line_dict['url']
        platform=line_dict['platform']
        if platform == "haokan":
            try:
                url_int = int(url)
                url = "https://sv.baidu.com/videoui/page/videoland?context=%7B%22nid%22%3A%22sv_{0}%22%7D".format(url)
                line_dict['url']=url
                line_dict['wrong_url'] = "true"
            except:
                pass
        fetch_time_ts=line_dict['fetch_time']
        fetch_time_T = datetime.datetime.fromtimestamp(fetch_time_ts/1e3)
        fetch_time_day_str = fetch_time_T.isoformat()[:10] 
        
        ts_every_line = int(datetime.datetime.now().timestamp()*1e3)
        line_dict['timestamp']=ts_every_line
        vid = cal_doc_id(platform=platform, url=url,
                            doc_id_type='daily-url',
                            fetch_day_str=fetch_time_day_str,
                            data_dict=line_dict
                            )

        id_daily = vid
        action_str_daily = '{"index": {"_id":"%s"}}' % id_daily
        data_str=json.dumps(line_dict, ensure_ascii=False)
        line_body_for_daily=action_str_daily+'\n'+data_str+'\n'
        
        bulk_body_daily+=line_body_for_daily
        
        if line_counter%1000==0 or line_counter==total_hit and len(bulk_body_daily)>0:
            t1=datetime.datetime.now()
            es.bulk(index=index_pro, doc_type=doc_type_daily,
                    body=bulk_body_daily, request_timeout=200)
            t2=datetime.datetime.now()
            td=t2-t1
            print('written %d/%d [%.2f%%], costs %s,' % (line_counter, total_hit,
                                                       (line_counter/total_hit)*100, td),
                  datetime.datetime.now(), file=f_log)
            bulk_body_daily=''
        
    print('All done. Written %d lines.' % (line_counter),
          datetime.datetime.now(), file=f_log)  
    
        
    
    