# -*- coding:UTF-8 -*-
# @Time  : 2020/8/21 16:43
# @File  : search_strategy_d.py
# @email : litao@igengmei.com
# @author : litao

import hashlib
import json

import pymysql
import xlwt, datetime
import redis
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
from elasticsearch_7 import Elasticsearch
from elasticsearch_7.helpers import scan
import sys
import time
from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import lit
import pytispark.pytispark as pti

db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
                     db='jerry_prod')
cursor = db.cursor()

startTime = time.time()
sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True)
sparkConf.set("spark.debug.maxToStringFields", "100")
sparkConf.set("spark.tispark.plan.allow_index_double_read", False)
sparkConf.set("spark.tispark.plan.allow_index_read", True)
sparkConf.set("spark.hive.mapred.supports.subdirectories", True)
sparkConf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", True)
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.set("mapreduce.output.fileoutputformat.compress", False)
sparkConf.set("mapreduce.map.output.compress", False)
sparkConf.set("prod.gold.jdbcuri",
              "jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true")
sparkConf.set("prod.mimas.jdbcuri",
              "jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true")
sparkConf.set("prod.gaia.jdbcuri",
              "jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true")
sparkConf.set("prod.tidb.jdbcuri",
              "jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.jerry.jdbcuri",
              "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.158:2379")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.170:4000")
sparkConf.set("prod.tidb.database", "jerry_prod")

spark = (SparkSession.builder.config(conf=sparkConf).config("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
         .config("spark.tispark.pd.addresses", "172.16.40.170:2379").appName(
    "LR PYSPARK TEST").enableHiveSupport().getOrCreate())

spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")

task_list = []
task_days = 1

for t in range(0, task_days):
    day_num = 0 - t
    now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
    last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
    today_str = now.strftime("%Y%m%d")
    yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
    one_week_age_str = (now + datetime.timedelta(days=-90)).strftime("%Y%m%d")
    sql = """
SELECT
    partition_date
    ,device_os_type,active_type,channel
    ,dau
    ,all_search_click_uv_724
    ,all_search_click_pv_724 
    ,all_search_uv
    ,all_search_pv
    ,if(dau <> 0 ,cast((all_search_uv/dau) as decimal(18,2)) , 0) as completed_user_rate
    ,if(all_search_uv <> 0 ,concat(cast((all_search_pv/all_search_uv) as decimal(18,4)),'') , 0) as per_user_search_count
    ,if(search_home_pv <> 0 ,cast((all_search_pv/search_home_pv) as decimal(18,4)) , 0) as search_success_rate
    ,if(sug_out_pv_726 <> 0 ,cast((sug_search_pv_726/sug_out_pv_726) as decimal(18,4)) , 0)  as sug_uesd_rate
    ,NVL(referrer_search_hexin_pv,0) as referrer_search_hexin_pv
    ,NVL(referrer_search_welfare_pv,0) as referrer_search_welfare_pv
    ,if(all_search_uv <> 0 ,concat(cast((referrer_search_hexin_pv/dau) as decimal(18,2)),'') , 0)   as search_core_pv_div_dau
    ,if(all_search_uv <> 0 ,concat(cast((referrer_search_neirong_pv/dau) as decimal(18,2)),'') , 0) as search_neirong_pv_div_dau
    ,if(referrer_search_hexin_pv <> 0 ,cast((search_hexin_two_pv/dau)*100 as decimal(18,4)) , 0)  as search_core_two_div_dau
    ,if(referrer_search_neirong_pv <> 0 ,cast((search_neirong_two_pv/dau)*100 as decimal(18,4)) , 0) as search_neirong_two_div_dau
    ,if(referrer_search_neirong_uv_1000 <> 0 ,concat(cast((referrer_search_neirong_pagestay/dau) as decimal(18,4)),'') , 0) as search_neirong_per_duration
    ,NVL(referrer_search_neirong_pv,0) as referrer_search_neirong_pv
    ,NVL(search_hexin_two_pv,0)  as search_hexin_two_pv
    ,NVL(search_neirong_two_pv,0) as search_neirong_two_pv
    ,NVL(referrer_search_neirong_pagestay,0) as referrer_search_neirong_pagestay
FROM
(
    SELECT
        coalesce(t1.partition_date,t2.partition_date,t3.partition_date,t4.partition_date,t5.partition_date,t6.partition_date) as partition_date
        ,coalesce(t1.active_type,t2.active_type,t3.active_type,t4.active_type,t5.active_type,t6.active_type) as active_type
        ,coalesce(t1.device_os_type,t2.device_os_type,t3.device_os_type,t4.device_os_type,t5.device_os_type,t6.device_os_type) as device_os_type
        ,coalesce(t1.channel,t2.channel,t3.channel,t4.channel,t5.channel,t6.channel) as channel
        ,coalesce(t1.dau,0) as dau 
        ,coalesce(t3.all_search_uv,0) as all_search_uv                                          
        ,coalesce(t3.all_search_pv,0) as all_search_pv                                          
        ,coalesce(t4.search_home_uv,0) as search_home_uv                                       
        ,coalesce(t4.search_home_pv,0) as search_home_pv                                         
        ,coalesce(t3.sug_search_uv_726,0) as sug_search_uv_726                                   
        ,coalesce(t3.sug_search_pv_726,0) as sug_search_pv_726                                  
        ,coalesce(t2.all_search_click_uv_724,0) as all_search_click_uv_724                 
        ,coalesce(t2.all_search_click_pv_724,0) as all_search_click_pv_724                
        ,coalesce(t4.referrer_search_hexin_pv,0) as referrer_search_hexin_pv                
        ,coalesce(t4.referrer_search_welfare_pv,0) as referrer_search_welfare_pv           
        ,coalesce(t4.referrer_search_neirong_pv,0) as referrer_search_neirong_pv           
        ,coalesce(t5.search_hexin_two_pv,0) as search_hexin_two_pv                           
        ,coalesce(t5.search_neirong_two_pv,0) as search_neirong_two_pv                       
        ,coalesce(t4.referrer_search_neirong_uv_1000,0) as referrer_search_neirong_uv_1000   
        ,coalesce(t4.referrer_search_neirong_pagestay,0) as referrer_search_neirong_pagestay
        ,coalesce(t6.sug_out_uv_726,0) as sug_out_uv_726
        ,coalesce(t6.sug_out_pv_726,0) as sug_out_pv_726
    FROM
    (
        SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,count(DISTINCT mas.device_id) as dau
        FROM
        (
            SELECT
            partition_date,m.device_id
            ,array(device_os_type ,'合计') as device_os_type
            ,array(case WHEN active_type = '4'  THEN '老活' 
                  WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
            ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
            FROM online.ml_device_day_active_status m
                LEFT JOIN 
                (SELECT code,is_ai_channel,partition_day
                 FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                 WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
            where partition_date >= '{start_date}'  
            AND partition_date < '{end_date}' 
            AND active_type in ('1','2','4')
        ) mas
        LATERAL VIEW explode(mas.channel) t2 AS channel
        LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
        LATERAL VIEW explode(mas.active_type) t2 AS active_type
        GROUP BY mas.partition_date,t2.active_type,t2.device_os_type,t2.channel
    )t1 
    full JOIN
    (
        SELECT
            click.partition_date as partition_date
            ,active_type
            ,device_os_type
            ,channel
            ,count(click.cl_id) as all_search_click_pv_724
            ,count(distinct click.cl_id) as all_search_click_uv_724
        FROM
        (
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,'' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_navbar_search'
              AND (int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 24)

            UNION all
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'do_search'
              AND params['input_type'] = '详情页默认词'

            union all 
            SELECT cl_id,partition_date,action,'home' as page_name,'首页-猜你喜欢' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['in_page_pos']='猜你喜欢'
              AND params['tab_name']='精选'
              AND params['card_type']='search_word'
              --AND page_name='home' android的page_name为空

            union all 
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['page_name']='welfare_home'
              AND params['card_type'] ='search_word'
              AND params['in_page_pos']='大家都在搜'

            union all 
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['card_type'] ='highlight_word'
        )click
        JOIN
        (
            SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
            FROM
            (
                SELECT
                partition_date,m.device_id
                ,array(device_os_type ,'合计') as device_os_type
                ,array(case WHEN active_type = '4'  THEN '老活' 
                      WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
                ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
                FROM online.ml_device_day_active_status m
                    LEFT JOIN 
                    (SELECT code,is_ai_channel,partition_day
                     FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                     WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                    ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
                where partition_date >= '{start_date}'  
                AND partition_date < '{end_date}' 
                AND active_type in ('1','2','4')
            ) mas
            LATERAL VIEW explode(mas.channel) t2 AS channel
            LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
            LATERAL VIEW explode(mas.active_type) t2 AS active_type
        )dev_channel
          on dev_channel.device_id = click.cl_id
            AND dev_channel.partition_date = click.partition_date
        GROUP BY click.partition_date,active_type,device_os_type,channel
    )t2 
      on t2.partition_date = t1.partition_date
        AND t2.active_type = t1.active_type
        AND t2.device_os_type = t1.device_os_type
        AND t2.channel = t1.channel
    full JOIN
    (
        SELECT
            click.partition_date as partition_date
            ,active_type
            ,device_os_type
            ,channel
            ,count(click.cl_id) as all_search_pv 
            ,count(distinct click.cl_id) as all_search_uv
            ,count(case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id else null end) as sug_search_pv_726
            ,count(distinct case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id else null end) as sug_search_uv_726
        FROM
        (
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action in ('do_search','search_result_click_search')

            union all 
            SELECT cl_id,partition_date,action,'search_home' as page_name,'' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['page_name']='search_home'

            union all 
            SELECT cl_id,partition_date,action,'home' as page_name,'首页-猜你喜欢' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['in_page_pos']='猜你喜欢'
              AND params['tab_name']='精选'
              AND params['card_type']='search_word'
              --AND page_name='home' android的page_name为空

            union all 
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['page_name']='welfare_home'
              AND params['card_type'] ='search_word'
              AND params['in_page_pos']='大家都在搜'

            union all 
            SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action = 'on_click_card'
              AND params['card_type'] ='highlight_word'
        )click
        JOIN
        (
            SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
            FROM
            (
                SELECT
                partition_date,m.device_id
                ,array(device_os_type ,'合计') as device_os_type
                ,array(case WHEN active_type = '4'  THEN '老活' 
                      WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
                ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
                FROM online.ml_device_day_active_status m
                    LEFT JOIN 
                    (SELECT code,is_ai_channel,partition_day
                     FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                     WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                    ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
                where partition_date >= '{start_date}'  
                AND partition_date < '{end_date}' 
                AND active_type in ('1','2','4')
            ) mas
            LATERAL VIEW explode(mas.channel) t2 AS channel
            LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
            LATERAL VIEW explode(mas.active_type) t2 AS active_type 
        )dev_channel
          on dev_channel.device_id = click.cl_id
            AND dev_channel.partition_date = click.partition_date
        GROUP BY click.partition_date,active_type,device_os_type,channel
    )t3 
      on t3.partition_date = t1.partition_date
        AND t3.active_type = t1.active_type
        AND t3.device_os_type = t1.device_os_type
        AND t3.channel = t1.channel
    full JOIN
    (
        SELECT
            page.partition_date as partition_date
            ,active_type
            ,device_os_type
            ,channel
            ,count(case when page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') then page.cl_id else NULL end) as search_home_pv
            ,count(distinct case when page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') then page.cl_id else NULL end) as search_home_uv
            ,count(CASE when referrer in  ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') and page_name in ('welfare_detail','organization_detail','expert_detail') THEN page.cl_id else NULL END) as referrer_search_hexin_pv
            ,count(CASE when referrer in  ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') and page_name in ('welfare_detail') THEN page.cl_id else NULL END) as referrer_search_welfare_pv
            ,count(CASE when referrer in  ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
                        ,'question_answer_detail','article_detail') THEN page.cl_id else NULL END) as referrer_search_neirong_pv
            ,count(DISTINCT CASE WHEN referrer in  ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
                        ,'question_answer_detail','article_detail') and page_stay >= '0' and page_stay < '1000' THEN page.cl_id else NULL END) as referrer_search_neirong_uv_1000
            ,sum(CASE WHEN referrer in  ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
                        ,'question_answer_detail','article_detail') and page_stay >= '0' and page_stay < '1000' THEN page.page_stay else NULL END) as  referrer_search_neirong_pagestay
        FROM
        (
            SELECT cl_id,partition_date,page_name,params['referrer'] as referrer,page_stay
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND action='page_view'
              AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
                        ,'diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
                        ,'question_answer_detail','article_detail','welfare_detail','organization_detail','expert_detail','level_one_plan_detail')
        )page  
        JOIN
        (
            SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
            FROM
            (
                SELECT
                partition_date,m.device_id
                ,array(device_os_type ,'合计') as device_os_type
                ,array(case WHEN active_type = '4'  THEN '老活' 
                      WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
                ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
                FROM online.ml_device_day_active_status m
                    LEFT JOIN 
                    (SELECT code,is_ai_channel,partition_day
                     FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                     WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                    ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
                where partition_date >= '{start_date}'  
                AND partition_date < '{end_date}' 
                AND active_type in ('1','2','4')
            ) mas
            LATERAL VIEW explode(mas.channel) t2 AS channel
            LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
            LATERAL VIEW explode(mas.active_type) t2 AS active_type
        )dev_channel
          on dev_channel.device_id = page.cl_id
            AND dev_channel.partition_date = page.partition_date
        GROUP BY page.partition_date,active_type,device_os_type,channel
    )t4 
      on t4.partition_date = t1.partition_date
        AND t4.active_type = t1.active_type
        AND t4.device_os_type = t1.device_os_type
        AND t4.channel = t1.channel
    full JOIN
    ( 
        SELECT
            card_click.partition_date as partition_date
            ,active_type
            ,device_os_type
            ,channel
            ,count(case when type = 'hexin' then card_click.cl_id else null end) as search_hexin_two_pv
            ,count(case when type = 'neirong' then card_click.cl_id else null end) as search_neirong_two_pv
        FROM
        (
            SELECT cl_id,partition_date,'hexin' as type
            FROM online.bl_hdfs_maidian_updates 
            WHERE partition_date >= '{start_date}'  
              AND partition_date < '{end_date}'  
              AND (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') 
                    or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
              AND ((action in ('welfare_multiattribute_click_add','welfare_multiattribute_click_buy') AND page_name = 'welfare_detail') 
                    or action = 'welfare_detail_click_message'
                    or (action = 'on_click_button' AND page_name = 'welfare_detail' AND params['button_name'] in ('question_tag','appointment'))
                    or (action = 'welfare_detail_click_curearea_contact' AND params['connect_type'] in ('phone','onlineconsult'))) 

            UNION ALL
            SELECT cl_id,partition_date,'hexin' as type
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date >= '{start_date}' and partition_date < '{end_date}'    --医院主页、医生主页点击咨询和浮层提问
            AND action = 'on_click_button'  
            AND page_name in ('organization_detail','expert_detail')
            AND (params['button_name'] in ('online_consult','phone_consult','put_question','discount_pay') OR params['button_name'] like "%？%")   
            AND params['referrer_link'] like '%[%' 
            and (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') 
                    or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
            
            UNION ALL
            SELECT cl_id,partition_date,'neirong' as type
            FROM online.bl_hdfs_maidian_updates
            WHERE partition_date>='{start_date}' AND partition_date<'{end_date}'
            AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','article_detail')
            AND (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
                        ,'search_result_wiki','search_result_question_answer') 
                    or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
                        ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
            AND (action in ('on_click_navbar_search','do_search') 
                  or (action='on_click_card' and params['card_type']='highlight_word') 
                  or (action='on_click_card' and params['card_content_type'] in ('service','qa','diary','user_post','answer'))
                  or (action='on_click_button' and params['button_name']='unfold' AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail'))
                  or (action='on_click_button' AND params['button_name']='more_recommendations'))
        )card_click 
        JOIN
        (
            SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
            FROM
            (
                SELECT
                partition_date,m.device_id
                ,array(device_os_type ,'合计') as device_os_type
                ,array(case WHEN active_type = '4'  THEN '老活' 
                      WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
                ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
                FROM online.ml_device_day_active_status m
                    LEFT JOIN 
                    (SELECT code,is_ai_channel,partition_day
                     FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                     WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                    ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
                where partition_date >= '{start_date}'  
                AND partition_date < '{end_date}' 
                AND active_type in ('1','2','4')
            ) mas
            LATERAL VIEW explode(mas.channel) t2 AS channel
            LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
            LATERAL VIEW explode(mas.active_type) t2 AS active_type
        )dev_channel
          on dev_channel.device_id = card_click.cl_id
            AND dev_channel.partition_date = card_click.partition_date
        GROUP BY card_click.partition_date,active_type,device_os_type,channel
    )t5 
      on t5.partition_date = t1.partition_date
        AND t5.active_type = t1.active_type
        AND t5.device_os_type = t1.device_os_type
        AND t5.channel = t1.channel

    full JOIN
    (
        SELECT
            click.partition_date as partition_date
            ,active_type
            ,device_os_type
            ,channel
            ,count(click.cl_id) as sug_out_pv_726
            ,count(distinct click.cl_id) as sug_out_uv_726
        FROM
        (
            SELECT cl_id,partition_date,action,params['page_name'] as page_name
            FROM online.bl_hdfs_maidian_updates
            where partition_date >= '{start_date}'  
            AND partition_date < '{end_date}' 
              AND action = 'searchsug_view'
              AND (int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26)
        )click
        JOIN
        (
            SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
            FROM
            (
                SELECT
                partition_date,m.device_id
                ,array(device_os_type ,'合计') as device_os_type
                ,array(case WHEN active_type = '4'  THEN '老活' 
                      WHEN active_type  in ('1','2')  then '新增' END ,'合计') as active_type 
                ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI'  ELSE '其他' END , '合计') as channel
                FROM online.ml_device_day_active_status m
                    LEFT JOIN 
                    (SELECT code,is_ai_channel,partition_day
                     FROM DIM.DIM_AI_CHANNEL_ZP_NEW
                     WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}'  ) tmp
                    ON  m.partition_date=tmp.partition_day AND first_channel_source_type=code
                where partition_date >= '{start_date}'  
                AND partition_date < '{end_date}' 
                AND active_type in ('1','2','4')
            ) mas
            LATERAL VIEW explode(mas.channel) t2 AS channel
            LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
            LATERAL VIEW explode(mas.active_type) t2 AS active_type
        )dev_channel
          on dev_channel.device_id = click.cl_id
            AND dev_channel.partition_date = click.partition_date
        GROUP BY click.partition_date,active_type,device_os_type,channel
    )t6 
      on t6.partition_date = t1.partition_date
        AND t6.active_type = t1.active_type
        AND t6.device_os_type = t1.device_os_type
        AND t6.channel = t1.channel
)t 

""".format(start_date=one_week_age_str, end_date=today_str, )
    device_df = spark.sql(sql)

    device_df.show(1, False)
    sql_res = device_df.collect()
    for res in sql_res:
        print(res)
        partition_date = res.partition_date
        device_os_type = res.device_os_type
        active_type = res.active_type
        channel = res.channel
        dau = res.dau
        all_search_click_uv_724 = res.all_search_click_uv_724
        all_search_click_pv_724 = res.all_search_click_pv_724
        all_search_uv = res.all_search_uv
        all_search_pv = res.all_search_pv
        completed_user_rate = res.completed_user_rate
        per_user_search_count = res.per_user_search_count
        search_success_rate = res.search_success_rate
        sug_uesd_rate = res.sug_uesd_rate
        referrer_search_hexin_pv = res.referrer_search_hexin_pv
        referrer_search_welfare_pv = res.referrer_search_welfare_pv
        search_core_pv_div_dau = res.search_core_pv_div_dau
        search_neirong_pv_div_dau = res.search_neirong_pv_div_dau
        search_core_two_div_dau = res.search_core_two_div_dau
        search_neirong_two_div_dau = res.search_neirong_two_div_dau
        search_neirong_per_duration = res.search_neirong_per_duration
        referrer_search_neirong_pv = res.referrer_search_neirong_pv
        search_hexin_two_pv = res.search_hexin_two_pv
        pid = hashlib.md5((partition_date + device_os_type + active_type + channel).encode("utf8")).hexdigest()
        instert_sql = """replace into core_indicators_monitoring(
            partition_date,device_os_type,active_type,channel,dau,all_search_click_uv_724,all_search_click_pv_724,all_search_uv,all_search_pv,completed_user_rate,per_user_search_count,
            search_success_rate,sug_uesd_rate,referrer_search_hexin_pv,referrer_search_welfare_pv,search_core_pv_div_dau,search_core_two_div_dau,search_neirong_two_div_dau,search_neirong_per_duration,referrer_search_neirong_pv,pid,search_neirong_pv_div_dau,search_hexin_two_pv
            ) VALUES('{partition_date}','{device_os_type}','{active_type}','{channel}',{dau},{all_search_click_uv_724},{all_search_click_pv_724},{all_search_uv},{all_search_pv},{completed_user_rate},{per_user_search_count},
            {search_success_rate},{sug_uesd_rate},{referrer_search_hexin_pv},{referrer_search_welfare_pv},{search_core_pv_div_dau},{search_core_two_div_dau},{search_neirong_two_div_dau},{search_neirong_per_duration},{referrer_search_neirong_pv},'{pid}',{search_neirong_pv_div_dau},{search_hexin_two_pv});""".format(
            partition_date=partition_date, device_os_type=device_os_type,
            active_type=active_type, channel=channel, dau=dau,
            all_search_click_uv_724=all_search_click_uv_724,all_search_click_pv_724=all_search_click_pv_724,all_search_uv=all_search_uv,all_search_pv=all_search_pv,completed_user_rate=completed_user_rate,per_user_search_count=per_user_search_count,
            search_success_rate=search_success_rate,sug_uesd_rate=sug_uesd_rate,referrer_search_hexin_pv=referrer_search_hexin_pv,
            referrer_search_welfare_pv=referrer_search_welfare_pv,search_core_pv_div_dau=search_core_pv_div_dau,
            search_core_two_div_dau=search_core_two_div_dau,search_neirong_two_div_dau=search_neirong_two_div_dau,
            search_neirong_per_duration=search_neirong_per_duration,referrer_search_neirong_pv=referrer_search_neirong_pv,
            pid=pid,search_neirong_pv_div_dau=search_neirong_pv_div_dau,search_hexin_two_pv=search_hexin_two_pv

        )
        print(instert_sql)
        # cursor.execute("set names 'UTF8'")
        res = cursor.execute(instert_sql)
        db.commit()
        print(res)
db.close()

