# -*- coding:utf-8 -*-
# @Time : 2019/7/26 14:33 
# @Author : litao
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 17 10:05:18 2018

@author: zhouyujiang

从csv 写入 target_releaser索引

"""
import json, re
import datetime, copy
from elasticsearch import Elasticsearch

try:
    from write_data_into_es.func_get_releaser_id import *
except:
    from .func_get_releaser_id import *
import redis
import hashlib

hosts = '192.168.17.11'
port = 80
user = 'zhouyujiang'
passwd = '8tM9JDN2LVxM'
http_auth = (user, passwd)
es = Elasticsearch(hosts=hosts, port=port, http_auth=http_auth)
index = 'short-video-production'
doc_type = 'daily-url'
pool = redis.ConnectionPool(host='192.168.17.60', port=6379, db=2, decode_responses=True)
rds = redis.Redis(connection_pool=pool)

today = datetime.datetime.now()
first_day = datetime.datetime(today.year, today.month, 1)
day_before_first_day = first_day - datetime.timedelta(1)
l_month = day_before_first_day.month
l_year = day_before_first_day.year
count = 0


def parse_line_dict(line, line_dict, blank_space_error, new_line_error, err_id_line):
    for k in line_dict:
        try:
            if " " in line_dict[k]:
                blank_space_error = blank_space_error + str(line + 2) + ","
            if "\r" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            if "\n" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            if "\t" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            line_dict[k] = line_dict[k].replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", "")
            try:
                if k == "releaserUrl":
                    line_dict[k] = \
                    re.findall(r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+~]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
                               line_dict[k])[0]
            except Exception as e:
                # print(e)
                err_id_line = err_id_line + str(line + 2) + ","
        except Exception as e:
            # print(e)
            continue
    return line_dict, blank_space_error, new_line_error, err_id_line


def write_to_es(file, push_to_redis=True, update=True, key_releaser=False, update_dic={}, extra_dic={}, **kwargs):
    """

    :param file:
    :param kwargs: not_push_to_redis = True 不push到redis中
                   department : Str 所属部门
                   key_releaser: bool 用于判断是否重点发布者
                   add_departments: list 用于增加部门
                   del_departments: list 用于删除部门
                   add_project_tags: list 用于增加项目标签
                   del_project_tags: list 用于删除项目标签
                   kwargs: extra_dic 用于添加额外的信息

                   导入的csv中添加 purchase_end_time 和 is_purchased 字段
                    用于表示是否采购 purchase_end_time (%Y-%m-%d)
                    is_purchased (0/1)
    :return:
    """

    bulk_all_body = ""
    err_id_line = ""
    blank_space_error = ""
    new_line_error = ""
    error_msg_list = []
    bluk_purchase_list = []
    count = 0

    try:
        f = open(file, 'r', encoding="gb18030")
        head = f.readline()
        head_list = head.strip().split(',')
    except:
        f = file
    for line, i in enumerate(f):
        if type(file) != list:
            try:
                line_list = i.strip().split(',')
                line_dict = dict(zip(head_list, line_list))
            except:
                line_dict = f
        else:
            line_dict = i
        print(i)

        try:
            platform = line_dict['platform']
            if platform == "short_video":
                line_dict['platform'] = line_dict['releaser_platform']
                platform = line_dict['releaser_platform']
        except:
            new_line_error += str(line + 2) + ","
            continue
        line_dict, blank_space_error, new_line_error, err_id_line = parse_line_dict(line, line_dict, blank_space_error,
                                                                                    new_line_error, err_id_line)
        if "" in line_dict:
            line_dict.pop("")
        try:
            releaserUrl = line_dict['releaserUrl']
            if platform == 'new_tudou':
                if releaserUrl[-2:] == '==':
                    releaserUrl = releaserUrl + '/videos'
                    line_dict['releaserUrl'] = releaserUrl
        except:
            releaserUrl = line_dict['releaserUrl']

        if extra_dic:
            line_dict.update(extra_dic)
        line_dict["releaser_id"] = get_releaser_id(platform=platform, releaserUrl=releaserUrl)
        if line_dict["releaser_id"]:
            doc_id = platform + '_' + line_dict['releaser_id']
        else:
            doc_id = platform + '_' + line_dict['releaser']
            err_id_line += str(line + 2) + ","
        find_exist = {
                "query": {
                        "bool": {
                                "filter": [
                                        {"term": {"_id": doc_id}}
                                ]
                        }
                }
        }
        if not extra_dic.get("project_tags"):
            extra_dic.pop("project_tags", 0)
        if not extra_dic.get("department_tags"):
            extra_dic.pop("department_tags", 0)

        search_re = es.search(index='target_releasers', doc_type='doc', body=find_exist)
        if search_re['hits']['total'] > 0:
            search_source = search_re['hits']['hits'][0]['_source']
            # print(search_source)
            if search_source.get("project_tags"):
                try:
                    # print(kwargs.get("extra_dic"))
                    line_dict["project_tags"].extend(search_source.get("project_tags"))
                    line_dict["project_tags"] = list(set(line_dict["project_tags"]))
                    search_source.pop("project_tags", 0)
                except Exception as e:
                    pass
                    # print("project_tags error", e)
            if search_source.get("department_tags"):
                try:
                    # print(kwargs.get("extra_dic"))
                    line_dict["department_tags"].extend(search_source.get("department_tags"))
                    line_dict["department_tags"] = list(set(line_dict["department_tags"]))
                    search_source.pop("department_tags", 0)
                except Exception as e:
                    pass
                    # print("project_tags error", e)
            if update:
                line_dict.update(search_source)
            line_dict["post_time"] = search_source.get("post_time")

        if line_dict.get("post_time"):
            pass
        else:
            line_dict['post_time'] = int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000)

        try:
            line_dict["releaser_id"] = get_releaser_id(platform=platform, releaserUrl=releaserUrl)
            if platform != "weixin" and platform != "weibo":
                line_dict["releaser_id_str"] = platform + "_" + line_dict["releaser_id"]
            else:
                line_dict["releaser_id_str"] = line_dict["releaser_id"]
            line_dict["is_valid"] = "true"

        except:
            line_dict["releaser_id"] = ""
            line_dict["releaser_id_str"] = ""
            line_dict["is_valid"] = "false"
        if kwargs.get("post_by"):
            line_dict["post_by"] = kwargs.get("post_by")
        # try:
        #     line_dict.pop("平台账号主页URL")
        # except:
        #     pass
        if platform in ["weixin"] and not line_dict.get("del_departments"):
            line_dict["is_purchased"] = True
        if not line_dict.get("project_tags"):
            line_dict["project_tags"] = []
        if not line_dict.get("department_tags"):
            line_dict["department_tags"] = []
        if line_dict.get("add_departments"):
            line_dict["department_tags"].extend(line_dict.get("add_departments"))
            line_dict["department_tags"] = list(set(line_dict["department_tags"]))
        if line_dict.get("del_departments"):
            for key in line_dict.get("del_departments"):
                try:
                    line_dict["department_tags"].remove(key)
                except:
                    continue
        if line_dict.get("add_project_tags"):
            line_dict["project_tags"].extend(line_dict.get("add_project_tags"))
            line_dict["project_tags"] = list(set(line_dict["project_tags"]))
        if line_dict.get("del_project_tags"):
            for key in line_dict.get("del_project_tags"):
                try:
                    line_dict["project_tags"].remove(key)
                except:
                    continue
        bulk_dic = {
                "releaser": line_dict.get("releaser"),
                "releaserUrl": line_dict.get("releaserUrl"),
                "platform": line_dict.get("platform"),
                "releaser_id": line_dict.get("releaser_id"),
                "releaser_id_str": line_dict.get("releaser_id_str"),
                "post_by": line_dict.get("post_by"),
                "Nov_2018": line_dict.get("Nov_2018"),
                "post_time": line_dict.get("post_time"),
                "frequency": 3 if line_dict.get("project_tags") else 1,
                "key_releaser": line_dict.get("key_releaser"),
                "is_valid": line_dict.get("is_valid"),
                "systematic": line_dict.get("platform") if line_dict.get("platform") == "weixin" or line_dict.get(
                    "platform") == "weibo" or line_dict.get("platform") == "app" or line_dict.get(
                    "platform") == "tv" else "short_video",
                "has_data": line_dict.get("has_data") if line_dict.get("has_data") else 0,
                "project_tags": line_dict.get("project_tags"),
                "department_tags": line_dict.get("department_tags"),
                'timestamp': int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000),
                'media_type': line_dict.get("media_type") if line_dict.get("media_type") else "",
                'tv_station': line_dict.get("tv_station") if line_dict.get("tv_station") else "",
                'releaser_type': line_dict.get("releaser_type") if line_dict.get("releaser_type") else "",
                'channel': line_dict.get("channel") if line_dict.get("channel") else "",
                'channel_type': line_dict.get("channel_type") if line_dict.get("channel_type") else "",
                'program': line_dict.get("program") if line_dict.get("program") else "",
                'tv_type': line_dict.get("tv_type") if line_dict.get("tv_type") else "",
        }
        if line_dict.get("is_purchased"):
            bulk_dic["is_purchased"] = True
            if type(line_dict.get("purchase_end_time")) == str:
                if "-" in line_dict.get("purchase_end_time"):
                    bulk_dic["purchase_end_time"] = int(
                        datetime.datetime.strptime(line_dict.get("purchase_end_time"), '%Y-%m-%d').timestamp() * 1e3)
                elif "/" in line_dict.get("purchase_end_time"):
                    bulk_dic["purchase_end_time"] = int(
                            datetime.datetime.strptime(line_dict.get("purchase_end_time"),
                                                       '%Y/%m/%d').timestamp() * 1e3)
                else:
                    error_msg_list.append("第%s行 日期格式错误,请修改后重试" % new_line_error[:-1])
            else:
                bulk_dic["purchase_end_time"] = 7258089600000
            bluk_purchase_list.append(bulk_dic)

        bulk_head = '{"index": {"_id":"%s"}}' % doc_id
        if push_to_redis:
            rds.lpush("releaser_doc_id_list", doc_id)
        data_str = json.dumps(bulk_dic, ensure_ascii=False)
        bulk_one_body = bulk_head + '\n' + data_str + '\n'
        #        print(bulk_one_body)
        bulk_all_body += bulk_one_body
        count = count + 1
        if count % 500 == 0:
            eror_dic = es.bulk(index='target_releasers', doc_type='doc',
                               body=bulk_all_body)
            bulk_all_body = ''
            if eror_dic['errors'] is True:
                print(eror_dic)
    if bulk_all_body != '':
        eror_dic = es.bulk(body=bulk_all_body,
                           index='target_releasers',
                           doc_type='doc',
                           )
        if eror_dic['errors'] is True:
            print(eror_dic)
    if line_dict.get("add_departments") and not push_to_redis:
        purchase_releaser_add(bluk_purchase_list, line_dict.get("add_departments"))
    if line_dict.get("del_departments") and not push_to_redis:
        purchase_releaser_add(bluk_purchase_list, line_dict.get("del_departments"), if_add=False)
    error_msg_list.append("%s条 写入成功" % count)
    if err_id_line:
        error_msg_list.append("第%s行 releaserUrl错误" % err_id_line[:-1])
    if blank_space_error:
        error_msg_list.append("第%s行 发现存在空格" % blank_space_error[:-1])
    if new_line_error:
        error_msg_list.append("第%s行 发现存在换行符" % new_line_error[:-1])
    return error_msg_list


def purchase_releaser_add(bluk_dic, departments, if_add=True):
    start_purchase_time = int(datetime.datetime.now().timestamp() * 1e3)
    bulk_all_body = ""
    count = 0
    for department in departments:
        if if_add:
            for dic in bluk_dic:
                _id = department + "_" + dic["releaser_id_str"]
                try:
                    res = es.get_source(index="department_purchase_log", doc_type="doc", id=_id, timeout="1m", )
                    start_purchase_time = res["purchase_start_time"]
                    dic.update(res)
                except:
                    pass
                dic["department"] = department
                dic["is_purchased"] = True
                dic["purchase_start_time"] = start_purchase_time
                dic["timestamp"] = start_purchase_time
                bulk_head = '{"index": {"_id":"%s"}}' % _id
                data_str = json.dumps(dic, ensure_ascii=False)
                bulk_one_body = bulk_head + '\n' + data_str + '\n'
                bulk_all_body += bulk_one_body
                count = count + 1
                if count % 500 == 0:
                    eror_dic = es.bulk(index='department_purchase_log', doc_type='doc',
                                       body=bulk_all_body)
                    bulk_all_body = ''
                    if eror_dic['errors'] is True:
                        print(eror_dic)
        else:
            for dic in bluk_dic:
                _id = department + "_" + dic["releaser_id_str"]
                try:
                    res = es.get_source(index="department_purchase_log", doc_type="doc", id=_id)
                    start_purchase_time = res["purchase_start_time"]
                    dic.update(res)
                except:
                    pass
                dic["department"] = department
                dic["is_purchased"] = False
                dic["purchase_end_time"] = start_purchase_time
                dic["timestamp"] = start_purchase_time
                bulk_head = '{"index": {"_id":"%s"}}' % _id
                data_str = json.dumps(dic, ensure_ascii=False)
                bulk_one_body = bulk_head + '\n' + data_str + '\n'
                bulk_all_body += bulk_one_body
                count = count + 1
                if count % 500 == 0:
                    eror_dic = es.bulk(index='department_purchase_log', doc_type='doc',
                                       body=bulk_all_body)
                    bulk_all_body = ''
                    if eror_dic['errors'] is True:
                        print(eror_dic)
    if not departments:
        for dic in bluk_dic:
            try:
                for department in dic["department_tags"]:
                    if not dic.get("is_purchased"):
                        continue
                    _id = department + "_" + dic["releaser_id_str"]
                    try:
                        res = es.get_source(index="department_purchase_log", doc_type="doc", id=_id)
                        start_purchase_time = res["purchase_start_time"]
                        dic.update(res)
                    except:
                        pass
                    dic["department"] = department
                    dic["is_purchased"] = True
                    dic["purchase_start_time"] = start_purchase_time
                    dic["timestamp"] = start_purchase_time
                    bulk_head = '{"index": {"_id":"%s"}}' % _id
                    data_str = json.dumps(dic, ensure_ascii=False)
                    bulk_one_body = bulk_head + '\n' + data_str + '\n'
                    bulk_all_body += bulk_one_body
                    count = count + 1
                    if count % 500 == 0:
                        eror_dic = es.bulk(index='department_purchase_log', doc_type='doc',
                                           body=bulk_all_body)
                        bulk_all_body = ''
                        if eror_dic['errors'] is True:
                            print(eror_dic)
            except:
                continue
    if bulk_all_body != '':
        eror_dic = es.bulk(body=bulk_all_body,
                           index='department_purchase_log',
                           doc_type='doc',
                           )
        if eror_dic['errors'] is True:
            print(eror_dic)


def ronghe_releaser_write_es(target_file, extra_dic={}, post_by=None):
    target_index = 'target_releasers'
    target_type = 'doc'
    bulk_all_body = ""
    err_id_line = ""
    blank_space_error = ""
    new_line_error = ""
    error_msg_list = []
    short_video_list = []
    try:
        f = open(target_file, 'r', encoding="gb18030")
        head = f.readline()
        head_list = head.strip().split(',')
    except:
        f = target_file
    for line, i in enumerate(f):
        if type(target_file) != list:
            try:
                line_list = i.strip().split(',')
                line_dict = dict(zip(head_list, line_list))
            except:
                line_dict = f
        else:
            line_dict = i

        dic, blank_space_error, new_line_error, err_id_line = parse_line_dict(line, line_dict, blank_space_error,
                                                                              new_line_error, err_id_line)

        print("line %s" % line)
        # dic["timestamp"] = int(datetime.datetime.now().timestamp() * 1e3)
        # dic["timestamp"] = 1580976928032
        if dic["channel_id"]:
            dic["channel_id"] = dic["channel_id"].zfill(4)
        if dic["live_type_id"]:
            dic["live_type_id"] = dic["live_type_id"].zfill(3)
        if dic.get("time_shift_type_id"):
            dic["time_shift_type_id"] = dic["time_shift_type_id"].zfill(3)
        try:
            if dic.get("releaserUrl") and dic["systematic"] == "short_video":
                # print(get_releaser_id(platform=dic["releaser_platform"], releaserUrl=dic["releaserUrl"]))
                dic["releaser_id_str"] = dic["platform"] + "_" + get_releaser_id(platform=dic["platform"],
                                                                                 releaserUrl=dic["releaserUrl"])
            else:
                dic["releaser_id_str"] = get_releaser_id(platform=dic["platform"], releaserUrl=dic["releaserUrl"])
        except:
            print("error_url", dic.get("releaserUrl"))
        # ID为如下字段哈希值
        # TV_station + platform + channel + releaser_platform + releaser + releaser_platform + releaserUrl
        # if dic["platform"] != "weibo":
        #     continue
        if dic["releaser_type"] == "电视新闻类":
            dic["media_type"] = ["traditional_media", "tv_news"]
        if dic["systematic"] == "short_video":
            _id = dic["releaser_id_str"]
        elif dic["systematic"] == "weibo":
            _id = "weibo_%s" % dic["releaser_id_str"]
        elif dic["systematic"] == "weixin":
            _id = "weixin_%s" % dic["releaser_id_str"]
        elif dic["systematic"] == "tv":
            eid = dic["tv_station"] + dic["systematic"] + dic["channel"] + dic["program"] + dic.get(
                "releaser") + dic.get(
                    "platform") + dic["releaserUrl"]
            print(eid)
            sha1 = hashlib.sha1()
            sha1.update(eid.encode("utf8"))
            _id = sha1.hexdigest()
        elif dic["systematic"] == "app":
            _id = "app_%s" % dic["releaser"]
        else:
            continue

        find_exist = {
                "query": {
                        "bool": {
                                "filter": [
                                        {"term": {"_id": _id}}
                                ]
                        }
                }
        }
        search_re = es.search(index='target_releasers', doc_type='doc', body=find_exist)
        if search_re['hits']['total'] > 0:
            search_source = search_re['hits']['hits'][0]['_source']
            search_source.update(dic)
            dic = search_source
        dic["is_ronghe_target_releaser"] = 1
        dic["is_removed"] = 0
        dic["is_valid"] = "true"
        dic["ronghe"] = "true"
        if not dic.get("department_tags"):
            dic["department_tags"] = []
        dic["department_tags"].append("CCR")
        dic["department_tags"] = list(set(dic["department_tags"]))
        if dic.get("project_tags"):
            try:
                dic["project_tags"].append("融合指数-CCR")
                dic["project_tags"] = list(set(dic["project_tags"]))
            except Exception as e:
                print(e)
                print(dic["project_tags"])
        else:
            dic["project_tags"] = ["融合指数-CCR"]
        # dic["timestamp"] = 1580976928032
        bulk_head = '{"index": {"_id":"%s"}}' % _id
        data_str = json.dumps(dic, ensure_ascii=False)
        if dic["platform"] in ["weixin", "weibo", "short_video"]:
            short_video_list.append(dic)
        bulk_one_body = bulk_head + '\n' + data_str + '\n'
        bulk_all_body += bulk_one_body
        if (line + 1) % 500 == 0:
            eror_dic = es.bulk(index=target_index, doc_type=target_type,
                               body=bulk_all_body, request_timeout=200)
            bulk_all_body = ''
            if eror_dic['errors'] is True:
                print(eror_dic['items'])
                print(bulk_all_body)
            print(line)

    if bulk_all_body != '':
        eror_dic = es.bulk(body=bulk_all_body,
                           index=target_index,
                           doc_type=target_type,
                           request_timeout=200)
        if eror_dic['errors'] is True:
            print(eror_dic)
            bulk_all_body = ''
            # print(platform, releaser, 'end_have:', len(wirte_set), 'add:', len(set_url))
        print(line)
    error_msg_list.append("%s条 写入成功" % count)
    # write_to_es(short_video_list,extra_dic=extra_dic,post_by=post_by)


if __name__ == "__main__":
    file = r'D:\work_file\发布者账号\融媒账号列表\【3月使用的】电视新闻融合指数（仅省级台）对照表0401 的副本.csv'
    file = r'D:\work_file\发布者账号\一次性需求附件\【1季度】电视新闻融合指数（仅省级台）对照表-微博.csv'
    # file = r'D:\wxfile\WeChat Files\litaolemo\FileStorage\File\2020-04\导入账号模板-客服测试(1).csv'
    # file = r'D:\work_file\发布者账号\一次性需求附件\【2月使用的】电视新闻融合指数（仅省级台）对照表0309 的副本.csv'
    extra_dic = {
            # "department_tags":["客服部"],
            #  'key_releaser': "True",
            #  'frequency': 3,
            #  "project_tags":["城市媒体融合"],
            # "is_purchased": True,
            # "del_departments": ["客服部"],
            # "purchase_end_time": "2020-04-28"
    }

    # csv_type = {"SMG": [], "an_hui": [], "ronghe": [], "su_zhou": []}
    #ronghe_releaser_write_es(file, post_by="litao")
    write_to_es(file, post_by="litao", extra_dic=extra_dic, push_to_redis=True)
