import datetime
import copy
import elasticsearch
from elasticsearch.helpers import scan
import json




# now - 2 天

hosts = '192.168.17.11'
port = 80
user = 'zhouyujiang'
passwd = '8tM9JDN2LVxM'
http_auth = (user, passwd)
es = elasticsearch.Elasticsearch(hosts=hosts, port=port, http_auth=http_auth)


class NameIdDict:

    def __init__(self, fh_datetime, re_datetime):
        self.re_datetime_st = re_datetime
        self.re_datetime_et = re_datetime + datetime.timedelta(1)
        self.fh_datetime_st = fh_datetime
        self.fh_datetime_et = fh_datetime + + datetime.timedelta(1)
        self.re_datetime = re_datetime
        self.fh_datetime = fh_datetime
        self.splits = 1
        self.re_datetime_st_ts = int(self.re_datetime_st.timestamp()*1000)
        self.re_datetime_et_ts = int(self.re_datetime_et.timestamp() * 1000)
        self.fh_datetime_st_ts = int(self.fh_datetime_st.timestamp() * 1000)
        self.fh_datetime_et_ts = int(self.fh_datetime_et.timestamp() * 1000)
        self.name_id_dict = {}
        self.change_list = []
        self.find_name_id()

    def func_list_to_dict(self, scan_re):
        re_dict = {}
        for one in scan_re:
            line = one['_source']
            try:
                name = line['releaser']
                _id = line['releaser_id_str']
            except KeyError:
                continue
            else:
                if _id not in re_dict:
                    re_dict[_id] = set()
                    re_dict[_id].add(name)
                else:
                    re_dict[_id].add(name)
        self.name_id_dict.update(re_dict)

    def find_name_id(self):
        id_list = self.find_target_releaser_id()
        print(len(id_list), "id的长度")
        step = 1000
        if len(id_list) < 1000:
            range_num = 1
            step = len(id_list)
        else:
            range_num = int(len(id_list) / 1000) + 1
        for i in range(0, range_num):
            st = i * step
            if i == range_num - 1:
                et = None
            else:
                et = step * (i + 1)

            search_body = {
                "query": {
                    "bool": {
                        "filter": [
                            # {"range": {"release_time": {"gte": self.re_datetime_st_ts, "lt": self.re_datetime_et_ts}}},
                            {"range": {"fetch_time": {"gte": self.fh_datetime_st_ts, "lt": self.fh_datetime_et_ts}}},
                            {"terms": {"releaser_id_str": id_list[st:et]}}
                        ]
                    }
                }
            }
            scan_re = scan(client=es, index='short-video-production', doc_type='daily-url', query=search_body)
            search_re = es.search(index='short-video-production', doc_type='daily-url', body=search_body)
            # print(search_re['hits']['total'], "发布者ID")
            self.func_list_to_dict(scan_re)

    def func_find_change(self, class_name_dict):
        change_list = []
        tmp1 = copy.deepcopy(self.name_id_dict)
        # print(tmp1, '旧的')
        tmp2 = copy.deepcopy(class_name_dict.name_id_dict)
        # print(tmp2, '新的')
        for one in tmp1:
            change_dict = {}
            one_value = tmp1[one]
            try:
                two_value = tmp2[one]
            except KeyError:
                two_value = set()
            all_value = one_value - two_value
            if len(all_value) > 0:
                change_dict['releaser_id'] = one
                change_dict['releaser_name_old'] = ','.join(list(self.name_id_dict[one]))
                try:
                    change_dict['releaser_name_new'] = list(class_name_dict.name_id_dict[one])[-1]
                except Exception as e:
                    # change_dict['releaser_name_old'] = list(self.name_id_dict[one])[0]
                    change_dict['releaser_name_new'] = ""
                # if change_dict['releaser_name_old'] == change_dict['releaser_name_new']:
                #     print(list(self.name_id_dict[one]), '旧的')
                #     print(list(class_name_dict.name_id_dict[one]), '新的')
                change_list.append(change_dict)
        # print(len(change_list))
        # print(change_list)
        self.change_list = change_list
        return change_list

    @staticmethod
    def find_target_releaser_id():
        id_list = []
        body = {
            "query": {
                "bool": {
                    "filter": [
                        {"term": {"key_releaser.keyword": "True"}}
                    ]
                }
            }
        }
        s_re = scan(client=es, index='target_releasers', query=body)
        for one in s_re:
            line = one['_source']
            releaser_id = line['releaser_id_str']
            id_list.append(releaser_id)
        # print(len(id_list))
        return id_list


def func_write_into_es(change_list, fhday):
    timestamp = int(datetime.datetime.now().timestamp()*1000)
    for one_c in change_list:
        if one_c == '':
            continue
        tmp_dict = {}
        releaser_id_str = one_c['releaser_id']
        old_name = one_c['releaser_name_old']
        new_name = one_c['releaser_name_new']
        change_day_str = str(fhday)[0:10]
        doc_id = releaser_id_str + '_' + change_day_str
        tmp_dict.update({'timestamp': timestamp,
                         'old_name': old_name,
                         'new_name': new_name,
                         'data_day': fhday.day,
                         'data_month': fhday.month,
                         'data_year': fhday.year,
                         'releaser_id_str': releaser_id_str
                         })
        bulk_head = '{"index": {"_id":"%s"}}' % doc_id
        data_str = json.dumps(tmp_dict, ensure_ascii=False)
        bulk_one_body = bulk_head + '\n' + data_str + '\n'

        eror_dic = es.bulk(index='change_name', doc_type='doc',
                           body=bulk_one_body, request_timeout=200)
        if eror_dic['errors'] is True:
            print(eror_dic['items'])
            print(bulk_one_body)


if __name__ == "__main__":
    now_ = datetime.datetime.now()
    now = datetime.datetime(now_.year, now_.month, now_.day)
    # new
    fh_dt = now - datetime.timedelta(1)
    re_dt = now - datetime.timedelta(2)
    t1 = NameIdDict(fh_dt, re_dt)
    # old
    fh_dt = now - datetime.timedelta(2)
    re_dt = now - datetime.timedelta(3)
    t2 = NameIdDict(fh_dt, re_dt)
    # 比较
    change_list = t2.func_find_change(t1)
    func_write_into_es(change_list=change_list, fhday=now_)










