# -*- coding:UTF-8 -*-
# @Time  : 2020/9/4 17:07
# @File  : search_meigou_ctr.py
# @email : litao@igengmei.com
# @author : litao

import hashlib
import json

import pymysql
import xlwt, datetime
import redis
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
from elasticsearch_7 import Elasticsearch
from elasticsearch_7.helpers import scan
import sys
import time
from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame


# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti


def con_sql(sql):
    # 从数据库的表里获取数据

    db = pymysql.connect(host='172.16.50.175', port=3306, user='doris', passwd='o5gbA27hXHHm',
                         db='doris_olap')
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchall()
    db.close()
    return result


startTime = time.time()
sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True)
sparkConf.set("spark.debug.maxToStringFields", "100")
sparkConf.set("spark.tispark.plan.allow_index_double_read", False)
sparkConf.set("spark.tispark.plan.allow_index_read", True)
sparkConf.set("spark.hive.mapred.supports.subdirectories", True)
sparkConf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", True)
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.set("mapreduce.output.fileoutputformat.compress", False)
sparkConf.set("mapreduce.map.output.compress", False)
sparkConf.set("prod.gold.jdbcuri",
              "jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true")
sparkConf.set("prod.mimas.jdbcuri",
              "jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true")
sparkConf.set("prod.gaia.jdbcuri",
              "jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true")
sparkConf.set("prod.tidb.jdbcuri",
              "jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
# sparkConf.set("prod.jerry.jdbcuri",
#               "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.158:2379")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.170:4000")
# sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf.setAppName("search_diary_ctr")
spark = (SparkSession.builder.config(conf=sparkConf).config("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
         .config("spark.tispark.pd.addresses", "172.16.40.170:2379").appName(
    "search_diary_ctr").enableHiveSupport().getOrCreate())

spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")

task_list = []
task_days = 90
for t in range(0, task_days):
    day_num = 0 - t
    now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
    last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
    today_str = now.strftime("%Y%m%d")
    yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
    one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")

    sql_search_ctr = r"""
     SELECT
    t1.partition_date as day_id
    ,device_os_type
    ,active_type
    ,channel
	,sum(diary_click_pv) as diary_click_pv,sum(diary_exp_pv) as diary_exp_pv,sum(qa_click_pv) as qa_click_pv,sum(qa_exp_pv) as qa_exp_pv
FROM
(  --dau
    SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
    FROM
    (
        SELECT
        concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
        ,m.device_id
        ,array(device_os_type ,'合计') as device_os_type
        ,array(case WHEN active_type = '4'  THEN '老活'
              WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type
        ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
        FROM online.ml_device_day_active_status m
            LEFT JOIN
            (SELECT code,is_ai_channel,partition_day
             FROM DIM.DIM_AI_CHANNEL_ZP_NEW
             WHERE partition_day="{today_str}" ) tmp
            ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
        where partition_date ="{today_str}"
        AND active_type in ('1','2','4')
    ) mas
    LATERAL VIEW explode(mas.channel) t2 AS channel
    LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
    LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t1
left JOIN
(--搜索结果页卡片精准曝光
    SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
        ,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
        ,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
    FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
    WHERE partition_day ="{today_str}"
    and action in ('page_precise_exposure','home_choiceness_card_exposure')
    and is_exposure = '1'
    and page_code in ('search_result_diary','search_result_question_answer')
    AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
    group by partition_day,device_id
)t6
on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
LEFT JOIN
(--搜索结果页卡片点击
    SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
        ,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
        ,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
    FROM online.bl_hdfs_maidian_updates
    WHERE partition_date ="{today_str}"
    AND action = 'on_click_card'
    AND params['card_content_type'] in ('answer','diary','question','qa')
    AND page_name in ('search_result_diary','search_result_question_answer')
    GROUP BY cl_id,partition_date
)t7
on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id

left join
( -- 去掉黑名单设备
    select distinct device_id
    from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
    where PARTITION_DAY = "{today_str}"
    AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
        """.format(today_str=today_str)

    print(sql_search_ctr)
    search_ctr_df = spark.sql(sql_search_ctr)
    # spam_pv_df.createOrReplaceTempView("dev_view")
    search_ctr_df.show(1)
    sql_res = search_ctr_df.collect()

    print("-------------------------------")

    for res in sql_res:
        print(res)
        device_os_type = res.device_os_type
        active_type = res.active_type
        partition_date = today_str
        channel = res.channel
        pid = hashlib.md5((partition_date + device_os_type + active_type + channel).encode("utf8")).hexdigest()
        diary_click_num = res.diary_click_pv
        diary_exposure = res.diary_exp_pv
        qa_click_num = res.qa_click_pv
        qa_exposure = res.qa_exp_pv

        try:
            search_ctr = round(diary_click_num / diary_exposure, 5)
        except:
            search_ctr = 0
        instert_sql_diary = """replace into search_diary_ctr(
            partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
            partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,channel=channel
            ,click_num=diary_click_num,
            exposure=diary_exposure, search_ctr=search_ctr
        )
        try:
            search_ctr = round(qa_click_num / qa_exposure, 5)
        except:
            search_ctr = 0
        instert_sql_qa = """replace into search_answer_ctr(
                    partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
            partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,
            channel=channel
            , click_num=qa_click_num,
            exposure=qa_exposure, search_ctr=search_ctr
        )
        print(instert_sql_diary)
        print(instert_sql_qa)
        # cursor.execute("set names 'UTF8'")
        db = pymysql.connect(host='172.16.50.175', port=3306, user='doris', passwd='o5gbA27hXHHm',
                             db='doris_olap')
        cursor = db.cursor()
        res = cursor.execute(instert_sql_diary)
        res = cursor.execute(instert_sql_qa)
        db.commit()
        print(res)