# -*- coding:UTF-8 -*-
# @Time  : 2020/11/27 10:53
# @File  : new_user_behavior_analysis.py
# @email : litao@igengmei.com
# @author : litao

import hashlib
import json
from meta_base_code.utils.func_get_uesr_event import get_user_event_from_mysql
import pymysql
import xlwt, datetime
import redis
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
# from elasticsearch_7 import Elasticsearch
# from elasticsearch_7.helpers import scan
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
import sys
import time
from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame
from meta_base_code.utils.func_from_redis_get_portrait import *
import pandas as pd

# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti


def con_sql(sql):
    # 从数据库的表里获取数据

    # db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
    #                      db='jerry_prod')
    db = pymysql.connect(host='172.16.30.136', port=3306, user='doris', passwd='o5gbA27hXHHm',
                         db='doris_prod')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result


exists_es_dic = {}
es = Elasticsearch([
    {
        'host': '172.16.31.17',
        'port': 9200,
    }, {
        'host': '172.16.31.11',
        'port': 9200,
    }])

startTime = time.time()
sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True)
sparkConf.set("spark.debug.maxToStringFields", "100")
sparkConf.set("spark.tispark.plan.allow_index_double_read", False)
sparkConf.set("spark.tispark.plan.allow_index_read", True)
sparkConf.set("spark.hive.mapred.supports.subdirectories", True)
sparkConf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", True)
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.set("mapreduce.output.fileoutputformat.compress", False)
sparkConf.set("mapreduce.map.output.compress", False)
sparkConf.set("prod.gold.jdbcuri",
              "jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true")
sparkConf.set("prod.mimas.jdbcuri",
              "jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true")
sparkConf.set("prod.gaia.jdbcuri",
              "jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true")
sparkConf.set("prod.tidb.jdbcuri",
              "jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
# sparkConf.set("prod.jerry.jdbcuri",
#               "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.158:2379")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.170:4000")
# sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf.setAppName("new_user_project_protratit")

spark = (SparkSession.builder.config(conf=sparkConf).config("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
         .config("spark.tispark.pd.addresses", "172.16.40.170:2379").enableHiveSupport().getOrCreate())

spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")

task_list = []
tractate_list = []


task_days = 76
for t in range(75, task_days):
    day_num = 0 - t
    now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
    last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
    tomorrow_str = (datetime.datetime.now() + datetime.timedelta(days=day_num + 1)).strftime("%Y%m%d")
    today_timestamp = int(now.timestamp())
    today_str = now.strftime("%Y%m%d")
    today_str_format = now.strftime("%Y-%m-%d")
    yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
    yesterday_str_format = (now + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
    one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
    new_urser_device_id_sql = r"""
    select t2.device_id as device_id from 
    (select device_id from online.ml_device_day_active_status where partition_date = '{today_str}' and active_type in (1,2)
    ) t2
    LEFT JOIN
    (
    select distinct device_id
    from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
    where PARTITION_DAY = '{today_str}'
    AND is_abnormal_device = 'true'
    )dev
    on t2.device_id=dev.device_id
WHERE dev.device_id is null and t2.device_id is not null
    """.format(today_str=today_str, yesterday_str_format=yesterday_str_format, today_str_format=today_str_format,
               tomorrow_str=tomorrow_str)
    new_urser_device_id_df = spark.sql(new_urser_device_id_sql)
    new_urser_device_id_df.createOrReplaceTempView("device_id_view")

    exposure_sql_lt_than_8 = """
        SELECT              
                    cl_id,
                    count(distinct card_id) as session_pv0
            FROM
                    (select device_id from device_id_view) dev left join 
                     (SELECT partition_date,
                      cl_id,
                      case when card_content_type in ('qa','answer') then 'qa'
                           when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
                      CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
                           when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
                           WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr') ) THEN array('ctr预估','合计')
                           when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
                           WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
                           WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
                           when transaction_type in ('pgc','hotspot') then array('热点卡片')
                           when transaction_type in ('newdata') then array('保量卡片')
                           when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
                           when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
                           when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
                           when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
                           when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
                           when transaction_type like 'deeplink%' then array('deeplink策略','合计')
                           end AS recommend_type,
                      card_id,
                      app_session_id
               from online.ml_community_precise_exposure_detail
               WHERE partition_date={partition_day}
               AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
               AND is_exposure = '1'  ----精准曝光
               AND page_name ='home'
               AND tab_name = '精选'
               AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
                    or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
               AND card_content_type in ('qa','diary','user_post','answer','special_pool')
               group by partition_date,
                      case when card_content_type in ('qa','answer') then 'qa'
                           when card_content_type in ('special_pool') then 'special' else card_content_type end,
                      cl_id,
                      CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
                           when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
                           WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
                           when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
                           WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
                           WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
                           when transaction_type in ('pgc','hotspot') then array('热点卡片')
                           when transaction_type in ('newdata') then array('保量卡片')
                           when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
                           when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
                           when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
                           when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
                           when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
                           when transaction_type like 'deeplink%' then array('deeplink策略','合计') end,
                      card_id,
                      app_session_id
            )a on a.cl_id = dev.device_id
            LATERAL VIEW explode (a.recommend_type) v as recommend_type
            group by cl_id having  session_pv0 <= 8
    """.format(partition_day=today_str)

    exposure_sql_gte_than_16 = """
           SELECT              
                       cl_id,
                       count(distinct card_id) as session_pv0
               FROM
               (select device_id from device_id_view) dev left join 
                        (SELECT partition_date,
                         cl_id,
                         case when card_content_type in ('qa','answer') then 'qa'
                              when card_content_type in ('special_pool') then 'special' else card_content_type end as card_content_type,
                         CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
                              when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
                              WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr') ) THEN array('ctr预估','合计')
                              when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
                              WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
                              WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
                              when transaction_type in ('pgc','hotspot') then array('热点卡片')
                              when transaction_type in ('newdata') then array('保量卡片')
                              when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
                              when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
                              when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
                              when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
                              when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
                              when transaction_type like 'deeplink%' then array('deeplink策略','合计')
                              end AS recommend_type,
                         card_id,
                         app_session_id
                  from online.ml_community_precise_exposure_detail
                  WHERE partition_date= '{partition_day}'
                  AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
                  AND is_exposure = '1'  ----精准曝光
                  AND page_name ='home'
                  AND tab_name = '精选'
                  AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed','aistragegy','excestragegy','FIXEDSTRATEGY','FIXEDSTRATEGY_VIDEO')
                       or transaction_type like '%ctr' or transaction_type like '%cvr' or transaction_type like 'deeplink%')
                  AND card_content_type in ('qa','diary','user_post','answer','special_pool')
                  group by partition_date,
                         case when card_content_type in ('qa','answer') then 'qa'
                              when card_content_type in ('special_pool') then 'special' else card_content_type end,
                         cl_id,
                         CASE when transaction_type in ('fmctr','samecity_fmctr') then array('fmctr','合计')
                              when transaction_type in ('high_quality_fmctr') then array('high_quality_fmctr','合计')
                              WHEN (transaction_type like '%ctr' and transaction_type not in ('high_quality_ctr','high_quality_fmctr','fmctr','samecity_fmctr')) THEN array('ctr预估','合计')
                              when transaction_type in ('high_quality_ctr') then array('high_quality_ctr','合计')
                              WHEN transaction_type like '%cvr' THEN array('cvr预估','合计')
                              WHEN transaction_type in ('-1','smr') THEN array('smr','合计')
                              when transaction_type in ('pgc','hotspot') then array('热点卡片')
                              when transaction_type in ('newdata') then array('保量卡片')
                              when transaction_type in ('hotspot_feed') then array('hotspot_feed','合计')
                              when transaction_type in ('aistragegy') then array('新用户AI帖优先','合计')
                              when transaction_type in ('excestragegy') then array('新用户精华帖优先','合计')
                              when transaction_type in ('FIXEDSTRATEGY') then array('新氧新用户策略一','合计')
                              when transaction_type in ('FIXEDSTRATEGY_VIDEO') then array('新氧新用户策略二','合计')
                              when transaction_type like 'deeplink%' then array('deeplink策略','合计') end,
                         card_id,
                         app_session_id
               )a on a.cl_id = dev.device_id
               LATERAL VIEW explode (a.recommend_type) v as recommend_type
               group by cl_id having  session_pv0 >= 8
       """.format(partition_day=today_str)

    print(new_urser_device_id_sql)


    exposure_sql_lt_than_8_df = spark.sql(exposure_sql_lt_than_8)


    exposure_sql_gte_than_16_df = spark.sql(exposure_sql_gte_than_16)


    sql_res = exposure_sql_gte_than_16_df.collect()
    res_dict = {}
    portrait_dict = {
        "first_demands": {},
        "second_demands": {},
        "first_solutions": {},
        "second_solutions": {},
        "first_positions": {},
        "second_positions": {},
        "projects": {},
        'anecdote_tags': {}
    }
    no_portrait_device_id_list = []
    print("-------------------------------")
    count_not_has_portratit = 0

    event_dict = {}
    event_dict_reverse = {}
    for count_user_count, res in enumerate(sql_res):
        # print(count, res)
        temp_count = 0
        try:
            for event_cn,projects in get_user_event_from_mysql(res.cl_id,today_timestamp):
                if not projects:
                    continue
                project_list = projects.split(",")
                for project in project_list:
                    if project not in event_dict:
                        event_dict[project] = {}
                    if event_dict[project].get(event_cn):
                        event_dict[project][event_cn] += 1
                    else:
                        event_dict[project][event_cn] = 1

                    if event_cn not in event_dict_reverse:
                        event_dict_reverse[event_cn] = {}
                    if event_dict_reverse[event_cn].get(project):
                        event_dict_reverse[event_cn][project] += 1
                    else:
                        event_dict_reverse[event_cn][project] = 1

        except Exception as e:
            print("error ", e)

        temp_count += 1
        if not temp_count:
            count_not_has_portratit += 1
            no_portrait_device_id_list.append(res.cl_id)

    # print(portrait_dict)
    # print(count_user_count + 1, count_not_has_portratit)
    # print("-------------------------------")
    print("event_dict_16",today_str,count_user_count,event_dict_reverse)
    f = open("log.txt","w",encoding='utf-8')
    f.writelines(str(event_dict_reverse))
    f.writelines(str(count_user_count))


    sql_res = exposure_sql_lt_than_8_df.collect()
    event_dict = {}
    event_dict_reverse = {}
    for count_user_count, res in enumerate(sql_res):
        # print(count, res)
        temp_count = 0
        try:
            for event_cn,projects in get_user_event_from_mysql(res.cl_id,today_timestamp):
                project_list = projects.split(",")
                if not projects:
                    continue
                for project in project_list:
                    if project not in event_dict:
                        event_dict[project] = {}
                    if event_dict[project].get(event_cn):
                        event_dict[project][event_cn] += 1
                    else:
                        event_dict[project][event_cn] = 1

                    if event_cn not in event_dict_reverse:
                        event_dict_reverse[event_cn] = {}
                    if event_dict_reverse[event_cn].get(project):
                        event_dict_reverse[event_cn][project] += 1
                    else:
                        event_dict_reverse[event_cn][project] = 1

        except Exception as e:
            print("error ", e)
        #
        temp_count += 1
        if not temp_count:
            count_not_has_portratit += 1
            no_portrait_device_id_list.append(res.cl_id)
    print("event_dict_8", today_str, count_user_count, event_dict_reverse)
    f.writelines(str(event_dict_reverse))
    f.writelines(str(count_user_count))
    f.flush()
    f.close()