# -*- coding:utf-8 -*-
# @Time : 2019/7/26 14:33 
# @Author : litao
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 17 10:05:18 2018

@author: zhouyujiang

从csv 写入 target_releaser索引

"""
import json, re
import datetime, copy
from elasticsearch import Elasticsearch

from write_data_into_es.func_get_releaser_id import get_releaser_id
import redis
import hashlib

hosts = '172.18.52.14'
port = 9200

HTTP_AUTH = ("elastic", "gm_test")
es = Elasticsearch(hosts=hosts, port=port, http_auth=HTTP_AUTH)
# pool = redis.ConnectionPool(host='192.168.17.60', port=6379, db=2, decode_responses=True)
# rds = redis.Redis(connection_pool=pool)

today = datetime.datetime.now()
first_day = datetime.datetime(today.year, today.month, 1)
day_before_first_day = first_day - datetime.timedelta(1)
l_month = day_before_first_day.month
l_year = day_before_first_day.year
count = 0


def parse_line_dict(line, line_dict, blank_space_error, new_line_error, err_id_line):
    for k in line_dict:
        try:
            if " " in line_dict[k]:
                blank_space_error = blank_space_error + str(line + 2) + ","
            if "\r" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            if "\n" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            if "\t" in line_dict[k]:
                new_line_error = new_line_error + str(line + 2) + ","
            line_dict[k] = line_dict[k].replace("\r", "").replace("\n", "").replace("\t", "").replace(" ", "")
        except Exception as e:
            # print(e)
            continue
    return line_dict, blank_space_error, new_line_error, err_id_line


def write_to_es(file, push_to_redis=True, update=True, key_releaser=False, update_dic={}, extra_dic={}, **kwargs):
    """

    :param file:
    :param kwargs: not_push_to_redis = True 不push到redis中
                   department : Str 所属部门
                   key_releaser: bool 用于判断是否重点发布者
                   add_departments: list 用于增加部门
                   del_departments: list 用于删除部门
                   add_project_tags: list 用于增加项目标签
                   del_project_tags: list 用于删除项目标签
                   kwargs: extra_dic 用于添加额外的信息

                   导入的csv中添加 purchase_end_time 和 is_purchased 字段
                    用于表示是否采购 purchase_end_time (%Y-%m-%d)
                    is_purchased (0/1)
    :return:
    """

    bulk_all_body = ""
    err_id_line = ""
    blank_space_error = ""
    new_line_error = ""
    error_msg_list = []
    bluk_purchase_list = []
    count = 0

    try:
        f = open(file, 'r', encoding="gb18030")
        head = f.readline()
        head_list = head.strip().split(',')
    except:
        f = file
    for line, i in enumerate(f):
        if type(file) != list:
            try:
                line_list = i.strip().split(',')
                line_dict = dict(zip(head_list, line_list))
            except:
                line_dict = f
        else:
            line_dict = i
        print(i)

        try:
            platform = line_dict['platform']
            if platform == "short_video":
                line_dict['platform'] = line_dict['releaser_platform']
                platform = line_dict['releaser_platform']
        except:
            new_line_error += str(line + 2) + ","
            continue
        line_dict, blank_space_error, new_line_error, err_id_line = parse_line_dict(line, line_dict, blank_space_error,
                                                                                    new_line_error, err_id_line)
        if "" in line_dict:
            line_dict.pop("")
        try:
            releaserUrl = line_dict['releaserUrl']
        except:
            releaserUrl = line_dict['releaserUrl']

        if extra_dic:
            line_dict.update(extra_dic)
        line_dict["releaser_id"] = get_releaser_id(platform=platform, releaserUrl=releaserUrl)

        if line_dict["releaser_id"]:
            doc_id = platform + '_' + line_dict['releaser_id']
        else:
            doc_id = platform + '_' + line_dict['releaser']
            err_id_line += str(line + 2) + ","

        if not extra_dic.get("project_tags"):
            extra_dic.pop("project_tags", 0)
        if not extra_dic.get("department_tags"):
            extra_dic.pop("department_tags", 0)

        if line_dict.get("post_time"):
            pass
        else:
            line_dict['post_time'] = int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000)

        try:
            line_dict["releaser_id"] = get_releaser_id(platform=platform, releaserUrl=releaserUrl)
            line_dict["releaser_id_str"] = platform + "_" + line_dict["releaser_id"]
            line_dict["is_valid"] = "true"
        except:
            line_dict["releaser_id"] = ""
            line_dict["releaser_id_str"] = ""
            line_dict["is_valid"] = "false"
        if kwargs.get("post_by"):
            line_dict["post_by"] = kwargs.get("post_by")
        if not line_dict.get("project_tags"):
            line_dict["project_tags"] = []
        if not line_dict.get("department_tags"):
            line_dict["department_tags"] = []

        bulk_dic = {
                "releaser": line_dict.get("releaser"),
                "releaserUrl": line_dict.get("releaserUrl"),
                "platform": line_dict.get("platform"),
                "releaser_id": line_dict.get("releaser_id"),
                "releaser_id_str": line_dict.get("releaser_id_str"),
                "post_by": line_dict.get("post_by"),
                "post_time": line_dict.get("post_time"),
                "frequency": 3 if line_dict.get("project_tags") else 1,
                "key_releaser": line_dict.get("key_releaser"),
                "is_valid": line_dict.get("is_valid"),
                # "has_data": line_dict.get("has_data") if line_dict.get("has_data") else 0,
                "project_tags": line_dict.get("project_tags"),
                "department_tags": line_dict.get("department_tags"),
                'timestamp': int(datetime.datetime.timestamp(datetime.datetime.now()) * 1000),
                'media_type': line_dict.get("media_type") if line_dict.get("media_type") else "",

        }

        bulk_head = '{"index": {"_id":"%s"}}' % doc_id
        # if push_to_redis:
        #     rds.lpush("releaser_doc_id_list", doc_id)
        data_str = json.dumps(bulk_dic, ensure_ascii=False)
        bulk_one_body = bulk_head + '\n' + data_str + '\n'
        #        print(bulk_one_body)
        bulk_all_body += bulk_one_body
        count = count + 1
        if count % 500 == 0:
            eror_dic = es.bulk(index='target_releasers',
                               body=bulk_all_body)
            bulk_all_body = ''
            if eror_dic['errors'] is True:
                print(eror_dic)
    if bulk_all_body != '':
        eror_dic = es.bulk(body=bulk_all_body,
                           index='target_releasers',
                           )
        if eror_dic['errors'] is True:
            print(eror_dic)
    error_msg_list.append("%s条 写入成功" % count)
    if err_id_line:
        error_msg_list.append("第%s行 releaserUrl错误" % err_id_line[:-1])
    if blank_space_error:
        error_msg_list.append("第%s行 发现存在空格" % blank_space_error[:-1])
    if new_line_error:
        error_msg_list.append("第%s行 发现存在换行符" % new_line_error[:-1])
    return error_msg_list


if __name__ == "__main__":
    file=r"D:\work_file\gengmei\litao\temp.csv"
    extra_dic = {
            "department_tags":["运营"],
             'key_releaser': True,
             'frequency': 3,
    }

    # csv_type = {"SMG": [], "an_hui": [], "ronghe": [], "su_zhou": []}
    #ronghe_releaser_write_es(file, post_by="litao")
    write_to_es(file, post_by="litao", extra_dic=extra_dic, push_to_redis=False)