Commit 3768b7a9 authored by litaolemo's avatar litaolemo

update

parent da0bb45b
# -*- coding:UTF-8 -*-
# @Time : 2020/8/21 16:43
# @File : search_strategy_d.py
# @email : litao@igengmei.com
# @author : litao
import hashlib
import json
import pymysql
import xlwt, datetime
import redis
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
from elasticsearch_7 import Elasticsearch
from elasticsearch_7.helpers import scan
import sys
import time
from pyspark import SparkConf
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.functions import lit
import pytispark.pytispark as pti
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
startTime = time.time()
sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True)
sparkConf.set("spark.debug.maxToStringFields", "100")
sparkConf.set("spark.tispark.plan.allow_index_double_read", False)
sparkConf.set("spark.tispark.plan.allow_index_read", True)
sparkConf.set("spark.hive.mapred.supports.subdirectories", True)
sparkConf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", True)
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.set("mapreduce.output.fileoutputformat.compress", False)
sparkConf.set("mapreduce.map.output.compress", False)
sparkConf.set("prod.gold.jdbcuri",
"jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true")
sparkConf.set("prod.mimas.jdbcuri",
"jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true")
sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true")
sparkConf.set("prod.tidb.jdbcuri",
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.jerry.jdbcuri",
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.158:2379")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.170:4000")
sparkConf.set("prod.tidb.database", "jerry_prod")
spark = (SparkSession.builder.config(conf=sparkConf).config("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
.config("spark.tispark.pd.addresses", "172.16.40.170:2379").appName(
"LR PYSPARK TEST").enableHiveSupport().getOrCreate())
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = []
task_days = 1
for t in range(0, task_days):
day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
today_str = now.strftime("%Y%m%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql = """
SELECT
partition_date
,device_os_type,active_type,channel
,dau
,all_search_click_uv_724
,all_search_click_pv_724
,all_search_uv
,all_search_pv
,if(dau <> 0 ,concat(cast((all_search_uv/dau)*100 as decimal(18,2)),'%') , '-')
,if(all_search_uv <> 0 ,concat(cast((all_search_pv/all_search_uv) as decimal(18,2)),'') , '-')
,if(search_home_pv <> 0 ,concat(cast((all_search_pv/search_home_pv)*100 as decimal(18,2)),'%') , '-')
,if(sug_out_pv_726 <> 0 ,concat(cast((sug_search_pv_726/sug_out_pv_726)*100 as decimal(18,2)),'%') , '-')
,NVL(referrer_search_hexin_pv,0)
,NVL(referrer_search_welfare_pv,0)
,if(all_search_uv <> 0 ,concat(cast((referrer_search_hexin_pv/dau) as decimal(18,2)),'') , '-')
,if(all_search_uv <> 0 ,concat(cast((referrer_search_neirong_pv/dau) as decimal(18,2)),'') , '-')
,if(referrer_search_hexin_pv <> 0 ,concat(cast((search_hexin_two_pv/dau)*100 as decimal(18,2)),'%') , '-')
,if(referrer_search_neirong_pv <> 0 ,concat(cast((search_neirong_two_pv/dau)*100 as decimal(18,2)),'%') , '-')
,if(referrer_search_neirong_uv_1000 <> 0 ,concat(cast((referrer_search_neirong_pagestay/dau) as decimal(18,2)),'') , '-')
,NVL(referrer_search_neirong_pv,0)
,NVL(search_hexin_two_pv,0)
,NVL(search_neirong_two_pv,0)
,NVL(referrer_search_neirong_pagestay,0)
FROM
(
SELECT
coalesce(t1.partition_date,t2.partition_date,t3.partition_date,t4.partition_date,t5.partition_date,t6.partition_date) as partition_date
,coalesce(t1.active_type,t2.active_type,t3.active_type,t4.active_type,t5.active_type,t6.active_type) as active_type
,coalesce(t1.device_os_type,t2.device_os_type,t3.device_os_type,t4.device_os_type,t5.device_os_type,t6.device_os_type) as device_os_type
,coalesce(t1.channel,t2.channel,t3.channel,t4.channel,t5.channel,t6.channel) as channel
,coalesce(t1.dau,0) as dau
,coalesce(t3.all_search_uv,0) as all_search_uv
,coalesce(t3.all_search_pv,0) as all_search_pv
,coalesce(t4.search_home_uv,0) as search_home_uv
,coalesce(t4.search_home_pv,0) as search_home_pv
,coalesce(t3.sug_search_uv_726,0) as sug_search_uv_726
,coalesce(t3.sug_search_pv_726,0) as sug_search_pv_726
,coalesce(t2.all_search_click_uv_724,0) as all_search_click_uv_724
,coalesce(t2.all_search_click_pv_724,0) as all_search_click_pv_724
,coalesce(t4.referrer_search_hexin_pv,0) as referrer_search_hexin_pv
,coalesce(t4.referrer_search_welfare_pv,0) as referrer_search_welfare_pv
,coalesce(t4.referrer_search_neirong_pv,0) as referrer_search_neirong_pv
,coalesce(t5.search_hexin_two_pv,0) as search_hexin_two_pv
,coalesce(t5.search_neirong_two_pv,0) as search_neirong_two_pv
,coalesce(t4.referrer_search_neirong_uv_1000,0) as referrer_search_neirong_uv_1000
,coalesce(t4.referrer_search_neirong_pagestay,0) as referrer_search_neirong_pagestay
,coalesce(t6.sug_out_uv_726,0) as sug_out_uv_726
,coalesce(t6.sug_out_pv_726,0) as sug_out_pv_726
FROM
(
SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,count(DISTINCT mas.device_id) as dau
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
GROUP BY mas.partition_date,t2.active_type,t2.device_os_type,t2.channel
)t1
full JOIN
(
SELECT
click.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(click.cl_id) as all_search_click_pv_724
,count(distinct click.cl_id) as all_search_click_uv_724
FROM
(
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_navbar_search'
AND (int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 24)
UNION all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'do_search'
AND params['input_type'] = '详情页默认词'
union all
SELECT cl_id,partition_date,action,'home' as page_name,'首页-猜你喜欢' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = click.cl_id
AND dev_channel.partition_date = click.partition_date
GROUP BY click.partition_date,active_type,device_os_type,channel
)t2
on t2.partition_date = t1.partition_date
AND t2.active_type = t1.active_type
AND t2.device_os_type = t1.device_os_type
AND t2.channel = t1.channel
full JOIN
(
SELECT
click.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(click.cl_id) as all_search_pv
,count(distinct click.cl_id) as all_search_uv
,count(case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id else null end) as sug_search_pv_726
,count(distinct case when int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26 AND input_type in ('联想','聚合模块') then click.cl_id else null end) as sug_search_uv_726
FROM
(
SELECT cl_id,partition_date,action,params['page_name'] as page_name,params['input_type'] as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action in ('do_search','search_result_click_search')
union all
SELECT cl_id,partition_date,action,'search_home' as page_name,'' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['page_name']='search_home'
union all
SELECT cl_id,partition_date,action,'home' as page_name,'首页-猜你喜欢' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'美购首页-大家都在搜' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['page_name']='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
union all
SELECT cl_id,partition_date,action,params['page_name'] as page_name,'高亮词' as input_type,app_version
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'on_click_card'
AND params['card_type'] ='highlight_word'
)click
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = click.cl_id
AND dev_channel.partition_date = click.partition_date
GROUP BY click.partition_date,active_type,device_os_type,channel
)t3
on t3.partition_date = t1.partition_date
AND t3.active_type = t1.active_type
AND t3.device_os_type = t1.device_os_type
AND t3.channel = t1.channel
full JOIN
(
SELECT
page.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(case when page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') then page.cl_id else NULL end) as search_home_pv
,count(distinct case when page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor') then page.cl_id else NULL end) as search_home_uv
,count(CASE when referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') and page_name in ('welfare_detail','organization_detail','expert_detail') THEN page.cl_id else NULL END) as referrer_search_hexin_pv
,count(CASE when referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') and page_name in ('welfare_detail') THEN page.cl_id else NULL END) as referrer_search_welfare_pv
,count(CASE when referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail') THEN page.cl_id else NULL END) as referrer_search_neirong_pv
,count(DISTINCT CASE WHEN referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail') and page_stay >= '0' and page_stay < '1000' THEN page.cl_id else NULL END) as referrer_search_neirong_uv_1000
,sum(CASE WHEN referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer') and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail') and page_stay >= '0' and page_stay < '1000' THEN page.page_stay else NULL END) as referrer_search_neirong_pagestay
FROM
(
SELECT cl_id,partition_date,page_name,params['referrer'] as referrer,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action='page_view'
AND page_name in ('search_home','search_home_more','search_home_welfare','search_home_diary','search_home_wiki','search_home_post','search_home_hospital','search_home_doctor'
,'diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail'
,'question_answer_detail','article_detail','welfare_detail','organization_detail','expert_detail','level_one_plan_detail')
)page
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = page.cl_id
AND dev_channel.partition_date = page.partition_date
GROUP BY page.partition_date,active_type,device_os_type,channel
)t4
on t4.partition_date = t1.partition_date
AND t4.active_type = t1.active_type
AND t4.device_os_type = t1.device_os_type
AND t4.channel = t1.channel
full JOIN
(
SELECT
card_click.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(case when type = 'hexin' then card_click.cl_id else null end) as search_hexin_two_pv
,count(case when type = 'neirong' then card_click.cl_id else null end) as search_neirong_two_pv
FROM
(
SELECT cl_id,partition_date,'hexin' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
AND ((action in ('welfare_multiattribute_click_add','welfare_multiattribute_click_buy') AND page_name = 'welfare_detail')
or action = 'welfare_detail_click_message'
or (action = 'on_click_button' AND page_name = 'welfare_detail' AND params['button_name'] in ('question_tag','appointment'))
or (action = 'welfare_detail_click_curearea_contact' AND params['connect_type'] in ('phone','onlineconsult')))
UNION ALL
SELECT cl_id,partition_date,'hexin' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}' and partition_date < '{end_date}' --医院主页、医生主页点击咨询和浮层提问
AND action = 'on_click_button'
AND page_name in ('organization_detail','expert_detail')
AND (params['button_name'] in ('online_consult','phone_consult','put_question','discount_pay') OR params['button_name'] like "%%")
AND params['referrer_link'] like '%[%'
and (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
UNION ALL
SELECT cl_id,partition_date,'neirong' as type
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>='{start_date}' AND partition_date<'{end_date}'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail','article_detail')
AND (referrer in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
or (params['referrer_link'] like '%[%' and json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare','search_result_wiki','search_result_question_answer')))
AND (action in ('on_click_navbar_search','do_search')
or (action='on_click_card' and params['card_type']='highlight_word')
or (action='on_click_card' and params['card_content_type'] in ('service','qa','diary','user_post','answer'))
or (action='on_click_button' and params['button_name']='unfold' AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail'))
or (action='on_click_button' AND params['button_name']='more_recommendations'))
)card_click
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = card_click.cl_id
AND dev_channel.partition_date = card_click.partition_date
GROUP BY card_click.partition_date,active_type,device_os_type,channel
)t5
on t5.partition_date = t1.partition_date
AND t5.active_type = t1.active_type
AND t5.device_os_type = t1.device_os_type
AND t5.channel = t1.channel
full JOIN
(
SELECT
click.partition_date as partition_date
,active_type
,device_os_type
,channel
,count(click.cl_id) as sug_out_pv_726
,count(distinct click.cl_id) as sug_out_uv_726
FROM
(
SELECT cl_id,partition_date,action,params['page_name'] as page_name
FROM online.bl_hdfs_maidian_updates
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND action = 'searchsug_view'
AND (int(split(app_version,'\\.')[0]) = 7 AND int(split(app_version,'\\.')[1]) >= 26)
)click
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{start_date}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{start_date}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = click.cl_id
AND dev_channel.partition_date = click.partition_date
GROUP BY click.partition_date,active_type,device_os_type,channel
)t6
on t6.partition_date = t1.partition_date
AND t6.active_type = t1.active_type
AND t6.device_os_type = t1.device_os_type
AND t6.channel = t1.channel
)t
""".format(start_date=today_str, end_date=last_30_day_str, )
device_df = spark.sql(sql)
device_df.show(1, False)
sql_res = device_df.collect()
for res in sql_res:
print(res)
# device_type = res.device_type
# active_type = res.active_type
# channel_type = res.channel_type
# core_pv_division_uv = res.core_pv_division_uv
# pv_division_uv = res.pv_division_uv
# pid = hashlib.md5(
# (today_str + device_type + active_type + channel_type).encode("utf8")).hexdigest()
# instert_sql = """replace into search_strategy_d(
# day_id,device_type,active_type,channel_type,core_pv_division_uv,pv_division_uv,pid
# ) VALUES('{day_id}','{device_type}','{active_type}','{channel_type}',{core_pv_division_uv},{pv_division_uv},'{pid}');""".format(
# day_id=today_str, device_type=device_type,
# active_type=active_type, channel_type=channel_type, core_pv_division_uv=core_pv_division_uv,
# pv_division_uv=pv_division_uv, pid=pid
#
# )
# print(instert_sql)
# # cursor.execute("set names 'UTF8'")
# res = cursor.execute(instert_sql)
# db.commit()
# print(res)
# db.close()
......@@ -63,7 +63,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = []
task_days = 90
task_days = 1
for t in range(0, task_days):
day_num = 0 - t
......
---插入每天数据
SELECT mas.partition_date day_id
,device_os_type
,active_type
,grey_type
,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
when t1.page_name in ('diary_detail') then '日记本详情页'
when t1.page_name in ('topic_detail') then '日记帖详情页'
when t1.page_name in ('question_answer_detail') then '问答详情页' end as page_name
,nvl(sum(t1.pv),0) as content_pv
,nvl(count(distinct t1.cl_id),0) as content_uv
,nvl(sum(t4.pv),0) as wel_exp_pv
,nvl(sum(t5.pv),0) as content_exp_pv
,nvl(sum(t2.pv),0) as wel_click_pv
,nvl(sum(t3.pv),0) as content_click_pv
,nvl(sum(t6.pv),0) as slide_wel_click_pv
,nvl(sum(t7.pv),0) as self_wel_click_pv
FROM
(
SELECT
partition_date,device_id,device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,CASE WHEN substr(convup(setencryption(device_id,'sha-1'),16,10),-1) in ('0','1','2','3','4') THEN '灰度' ELSE '非灰' END AS grey_type
FROM online.ml_device_day_active_status
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type in ('1','2','4')
AND first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not like 'promotion\_jf\_%'
)mas
join
(
SELECT partition_date,cl_id,page_name,count(cl_id) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND action='page_view'
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name
)t1
on mas.partition_date=t1.partition_date
and mas.device_id=t1.cl_id
left join
(--你可能喜欢美购卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']='service'
and params['in_page_pos']='bottom'
and params['tab_name'] is null--与横划区域区分
and ((params['position'] is not null and cl_type='android')
or (params['card_but_pos'] is null and cl_type='ios')) --与关联卡片区分
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t2
on t1.partition_date=t2.partition_date
and t1.cl_id=t2.cl_id
and t1.page_name=t2.page_name
left join
(--你可能喜欢内容卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type']<>'service'
and params['in_page_pos']='bottom'
and params['tab_name'] is null
AND page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
AND int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t3
on t1.partition_date=t3.partition_date
and t1.cl_id=t3.cl_id
and t1.page_name=t3.page_name
left join
(--你可能喜欢美购卡片曝光
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
select partition_date,cl_id,page_name,business_id,card_id,app_session_id
from online.ml_community_precise_exposure_detail
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and card_content_type='service'
and action ='page_precise_exposure'
and params['tab_name']='feed_recommend'
and params['in_page_pos']='bottom'
and int(split(app_version,'\\.')[1]) >= 30
and is_exposure='1'
group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
)a
group by partition_date,cl_id,page_name
)t4
on t1.partition_date=t4.partition_date
and t1.cl_id=t4.cl_id
and t1.page_name=t4.page_name
left join
(--你可能喜欢内容卡片曝光
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
select partition_date,cl_id,page_name,business_id,card_id,app_session_id
from online.ml_community_precise_exposure_detail
where partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and card_content_type in ('diary','user_post','answer')
and action ='page_precise_exposure'
and params['tab_name']='feed_recommend'
and params['in_page_pos']='bottom'
and int(split(app_version,'\\.')[1]) >= 30
and is_exposure='1'
group by partition_date,cl_id,page_name,business_id,card_id,app_session_id
)a
group by partition_date,cl_id,page_name
)t5
on t1.partition_date=t5.partition_date
and t1.cl_id=t5.cl_id
and t1.page_name=t5.page_name
left join
(--横划美购卡片点击
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_answer_detail')
and (( action='on_click_button' and params['button_name'] = 'more_recommendations')
or (action ='on_click_card' and params['in_page_pos']='recommand' and params['card_content_type']='service')
or (action ='on_click_card' and params['tab_name']='hot_recommendation' and params['card_content_type']='service'))
and int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t6
on t1.partition_date=t6.partition_date
and t1.cl_id=t6.cl_id
and t1.page_name=t6.page_name
left join
(--关联卡片
SELECT partition_date,cl_id,page_name,count(1) as pv
FROM
(
SELECT partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top'
or (params['in_page_pos']='bottom' and params['position'] is null and cl_type='android')
or (params['in_page_pos']='bottom' and params['card_but_pos'] is not null and cl_type='ios'))
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name in ('diary_detail','topic_detail')
AND params['position'] is null
and int(split(app_version,'\\.')[1]) >= 30
group by partition_date,cl_id,page_name,business_id,params['card_id'],app_session_id
)a
group by partition_date,cl_id,page_name
)t7
on t1.partition_date=t7.partition_date
and t1.cl_id=t7.cl_id
and t1.page_name=t7.page_name
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=mas.device_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on mas.partition_date=dev.partition_date and mas.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
group by mas.partition_date
,device_os_type,active_type,grey_type
,case when t1.page_name in ('post_detail','user_post_detail','doctor_post_detail') then '帖子详情页'
when t1.page_name in ('diary_detail') then '日记本详情页'
when t1.page_name in ('topic_detail') then '日记帖详情页'
when t1.page_name in ('question_answer_detail') then '问答详情页' end
order by 1,2,3,4,5;
--引用数据
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
grey_type as `灰度`,
page_name as `页面`,
NVL(CONCAT(ROUND(wel_click_pv/wel_exp_pv*100,2),'%'),0) as `内容页你可能喜欢美购卡片点击PV/该类卡片曝光PV`,
NVL(CONCAT(ROUND(content_click_pv/content_exp_pv*100,2),'%'),0) as `内容页你可能喜欢内容卡片点击PV/该类卡片曝光PV`,
content_pv as `内容详情页pv`,
content_uv as `内容详情页uv`,
wel_exp_pv as `你可能喜欢美购卡片曝光pv`,
content_exp_pv as `你可能喜欢内容卡片曝光pv`,
wel_click_pv as `你可能喜欢美购卡片点击pv`,
content_click_pv as `你可能喜欢内容卡片点击pv`,
slide_wel_click_pv as `横划卡片点击pv`,
self_wel_click_pv as `关联卡片点击pv`
FROM pm.tl_pm_contentpage_ctr
WHERE partition_day>='20200730' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc,`设备类型`,`活跃类型`,`灰度`,`页面`
sparkConf.set("prod.jerry.jdbcuri", "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
CREATE TABLE conent_detail_page_grayscale_ctr(
day_id varchar(100),
device_os_type varchar(100),
active_type varchar(100),
grey_type varchar(100),
page_name varchar(100),
content_pv BIGINT(20),
content_uv BIGINT(20),
wel_exp_pv BIGINT(20),
content_exp_pv BIGINT(20),
wel_click_pv BIGINT(20),
content_click_pv BIGINT(20),
slide_wel_click_pv BIGINT(20),
self_wel_click_pv BIGINT(20),
partition_day varchar(100),
neirong_ctr FLOAT,
meigou_ctr FLOAT,
grey_neirong_ctr FLOAT,
grey_meigou_ctr FLOAT,
pid varchar(100),
PRIMARY KEY ( pid )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
alert
select * from conent_detail_page_grayscale_ctr as s1 left join conent_detail_page_grayscale_ctr as s2 on s1.device_os_type = s2.device_os_type and s1.active_type = s2.active_type and page_name
----725版本搜索结果页page_view事件增加query词参数
SELECT t1.query as `query词`
,t1.device_os_type as `设备类型`
,t1.active_type as `活跃类型`
,t1.channel as `渠道类型`
,NVL(t1.search_pv,0) as `过去30天搜索pv`
,NVL(t1.search_uv,0) as `过去30天搜索uv`
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.hexin_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as `昨天-核心卡片点击PV/搜索UV`
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.neirong_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as `昨天-内容卡片点击PV/搜索UV`
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.hexin_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as `过去7天-核心卡片点击PV/搜索UV`
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.neirong_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as `过去7天-内容卡片点击PV/搜索UV`
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.hexin_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as `过去30天-核心卡片点击PV/搜索UV`
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.neirong_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as `过去30天-内容卡片点击PV/搜索UV`
,NVL(t2.hexin_card_click_pv,0) as `过去30天搜索结果核心卡片点击pv`
,NVL(t2.neirong_card_click_pv,0) as `过去30天搜索结果内容卡片点击pv`
,NVL(t3.search_pv,0) as `昨天搜索pv`
,NVL(t3.search_uv,0) as `昨天搜索uv`
,NVL(t4.hexin_card_click_pv,0) as `昨天搜索结果核心卡片点击pv`
,NVL(t4.neirong_card_click_pv,0) as `昨天搜索结果内容卡片点击pv`
,NVL(t5.search_pv,0) as `过去7天搜索pv`
,NVL(t5.search_uv,0) as `过去7天搜索uv`
,NVL(t6.hexin_card_click_pv,0) as `过去7天搜索结果核心卡片点击pv`
,NVL(t6.neirong_card_click_pv,0) as `过去7天搜索结果内容卡片点击pv`
FROM
(--过去30天总搜索量
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t1
LEFT JOIN
--过去30天搜索结果页卡片点击pv
(
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv) as hexin
,NVL(t3.pv) as neirong
FROM
(--核心卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(--内容卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),30),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t2
on t1.query=t2.query and t1.active_type=t2.active_type and t1.device_os_type = t2.device_os_type AND t1.channel = t2.channel
LEFT JOIN
(--昨天总搜索量
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t3
on t1.query=t3.query and t1.active_type=t3.active_type and t1.device_os_type = t3.device_os_type AND t1.channel = t3.channel
LEFT JOIN
(--昨天搜索结果页卡片点击pv
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv) as hexin
,NVL(t3.pv) as neirong
FROM
(--核心卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(--内容卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),1),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t4
on t1.query=t4.query and t1.active_type=t4.active_type and t1.device_os_type = t4.device_os_type AND t1.channel = t4.channel
LEFT JOIN
(--过去一周总搜索量
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
--AND page_name='home' android的page_name为空
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t5
on t1.query=t5.query and t1.active_type=t5.active_type and t1.device_os_type = t5.device_os_type AND t1.channel = t5.channel
LEFT JOIN
(--过去一周搜索结果页卡片点击pv
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv) as hexin
,NVL(t3.pv) as neirong
FROM
(--核心卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(--内容卡片点击
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_day < regexp_replace(current_date ,'-','')) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(date_sub(current_date(),7),'-','')
AND partition_date < regexp_replace(current_date ,'-','')
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t6
on t1.query=t6.query and t1.active_type=t6.active_type and t1.device_os_type = t6.device_os_type AND t1.channel = t6.channel
order by `过去30天搜索pv` desc,`query词`;
SELECT t1.query as query
,t1.device_os_type as device_type
,t1.active_type as active_type
,t1.channel as channel_type
,NVL(t1.search_pv,0) as 30_pv
,NVL(t1.search_uv,0) as 30_uv
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.hexin_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as 1_core_pv_division_uv
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.neirong_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as 1_pv_division_uv
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.hexin_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as 7_core_pv_division_uv
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.neirong_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as 7_pv_division_uv
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.hexin_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as 30_core_pv_division_uv
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.neirong_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as 30_pv_division_uv
,NVL(t2.hexin_card_click_pv,0) as 30_search_core_pv
,NVL(t2.neirong_card_click_pv,0) as 30_search_pv
,NVL(t3.search_pv,0) as 1_pv
,NVL(t3.search_uv,0) as 1_uv
,NVL(t4.hexin_card_click_pv,0) as 1_search_core_pv
,NVL(t4.neirong_card_click_pv,0) as 1_search_pv
,NVL(t5.search_pv,0) as 7_pv
,NVL(t5.search_uv,0) as 7_uv
,NVL(t6.hexin_card_click_pv,0) as 7_search_core_pv
,NVL(t6.neirong_card_click_pv,0) as 7_search_pv
FROM
(
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {last_30_day_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t1
LEFT JOIN
(
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv,0) as hexin
,NVL(t3.pv,0) as neirong
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {last_30_day_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {last_30_day_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t2
on t1.query=t2.query and t1.active_type=t2.active_type and t1.device_os_type = t2.device_os_type AND t1.channel = t2.channel
LEFT JOIN
(
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {yesterday_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t3
on t1.query=t3.query and t1.active_type=t3.active_type and t1.device_os_type = t3.device_os_type AND t1.channel = t3.channel
LEFT JOIN
(
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv,0) as hexin
,NVL(t3.pv,0) as neirong
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {yesterday_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {yesterday_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t4
on t1.query=t4.query and t1.active_type=t4.active_type and t1.device_os_type = t4.device_os_type AND t1.channel = t4.channel
LEFT JOIN
(
SELECT query,active_type,device_os_type,channel,search_pv,search_uv
FROM
(
SELECT query,active_type,device_os_type,channel
,count(t1.cl_id) as search_pv
,count(distinct t1.cl_id) as search_uv
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND action in ('do_search','search_result_click_search')
UNION ALL
SELECT partition_date,params['query'] as query,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['page_name']='search_home'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND params['in_page_pos']='猜你喜欢'
AND params['tab_name']='精选'
AND params['card_type']='search_word'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND action = 'on_click_card'
AND page_name='welfare_home'
AND params['card_type'] ='search_word'
AND params['in_page_pos']='大家都在搜'
UNION ALL
SELECT partition_date
,params['card_name'] as query
,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND int(split(app_version,'\\.')[1]) >= 27
AND action='on_click_card'
AND params['card_type']='highlight_word'
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {one_week_age_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t2
on t1.cl_id=t2.device_id AND t1.partition_date = t2.partition_date
GROUP BY query,active_type,device_os_type,channel
)t
)t5
on t1.query=t5.query and t1.active_type=t5.active_type and t1.device_os_type = t5.device_os_type AND t1.channel = t5.channel
LEFT JOIN
(
SELECT t1.query,active_type,device_os_type,channel
,sum(hexin) as hexin_card_click_pv
,sum(neirong) as neirong_card_click_pv
FROM
(
SELECT NVL(t2.partition_date,t3.partition_date) as partition_date
,NVL(t2.cl_id,t3.cl_id) as cl_id
,NVL(t2.query,t3.query) as query
,NVL(t2.pv,0) as hexin
,NVL(t3.pv,0) as neirong
FROM
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item','search_result_hospital_click_item','search_result_doctor_click_item','on_click_doctor_card', 'on_click_hospital_card')
AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'goto_welfare_detail' AND params [ 'from' ] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service','hospital','doctor') AND page_name in ('search_result_more','search_result_welfare','search_result_hospital','search_result_doctor'))
or (action = 'on_click_button' AND params['button_name'] = 'check_plan' AND page_name = 'search_result_more'))
GROUP BY partition_date
,params['query']
,cl_id
)t2
FULL JOIN
(
SELECT partition_date
,params['query'] as query
,cl_id
,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date
,params['query']
,cl_id
)t3
on t3.partition_date=t2.partition_date
AND t3.query=t2.query
AND t3.cl_id=t2.cl_id
)t1
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= {one_week_age_str}
AND partition_day < {today_str}) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= {one_week_age_str}
AND partition_date < {today_str}
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev
on t1.cl_id=dev.device_id and t1.partition_date = dev.partition_date
GROUP BY t1.query,active_type,device_os_type,channel
)t6
on t1.query=t6.query and t1.active_type=t6.active_type and t1.device_os_type = t6.device_os_type AND t1.channel = t6.channel
order by 30_pv desc,query
\ No newline at end of file
# 首页卡片点击PV/首页卡片精准曝光PV
select *,ROUND(cast(card_click as double) /cast(card_exposure as double),5) as card_click_divide_card_exposure
from pm.tl_pm_recommend_strategy_d where
device_os_type = 'android' and active_type = '新增' and recommend_type = 'ctr预估' and recommend_type = 'ctr预估' and card_content_type = 'diary'
order by day_id desc
spark-shell --master yarn --total-executor-cores 1 --executor-memory 1g
SELECT t1.query as query
,t1.device_os_type as device_type
,t1.active_type as active_type
,t1.channel as channel_type
,NVL(t1.search_pv,0) as 30_pv
,NVL(t1.search_uv,0) as 30_uv
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.hexin_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as 1_core_pv_division_uv
,if(NVL(t3.search_uv,0) <> 0 ,concat(cast((NVL(t4.neirong_card_click_pv,0)/NVL(t3.search_uv,0)) as decimal(18,2)),'') , '-') as 1_pv_division_uv
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.hexin_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as 7_core_pv_division_uv
,if(NVL(t5.search_uv,0) <> 0 ,concat(cast((NVL(t6.neirong_card_click_pv,0)/NVL(t5.search_uv,0)) as decimal(18,2)),'') , '-') as 7_pv_division_uv
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.hexin_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as 30_core_pv_division_uv
,if(NVL(t1.search_uv,0) <> 0 ,concat(cast((NVL(t2.neirong_card_click_pv,0)/NVL(t1.search_uv,0)) as decimal(18,2)),'') , '-') as 30_pv_division_uv
,NVL(t2.hexin_card_click_pv,0) as 30_search_core_pv
,NVL(t2.neirong_card_click_pv,0) as 30_search_pv
,NVL(t3.search_pv,0) as 1_pv
,NVL(t3.search_uv,0) as 1_uv
,NVL(t4.hexin_card_click_pv,0) as 1_search_core_pv
,NVL(t4.neirong_card_click_pv,0) as 1_search_pv
,NVL(t5.search_pv,0) as 7_pv
,NVL(t5.search_uv,0) as 7_uv
,NVL(t6.hexin_card_click_pv,0) as 7_search_core_pv
,NVL(t6.neirong_card_click_pv,0) as 7_search_pv
query
--存入每天数据
SELECT
t1.partition_date as day_id,
t1.device_os_type as device_os_type,
t1.active_type as active_type,
t2.card_content_type as card_content_type,
t2.recommend_type as recommend_type,
NVL(sum(t3.session_pv),0) as card_click,
NVL(sum(t2.session_pv),0) as card_exposure,
NVL(round(sum(page_stay)/count(distinct t4.cl_id)/60,2),0) as avg_page_stay,
NVL(sum(navbar_pv),0) as navbar_search,
NVL(sum(highlight_pv),0) as highlight_word,
NVL(sum(self_wel_pv),0) as self_welfare_card,
NVL(sum(recom_wel_pv),0)-NVL(sum(self_wel_pv),0) as recommend_welfare_card,--需要排除关联的商品卡片点击
NVL(sum(recom_content_pv),0) as recommend_content_card,
NULL as recommend_special_card,
NVL(sum(referral_pv),0) as transfer_card,
NVL(sum(video_pv),0) as video_consultation
FROM
(
SELECT partition_date
,device_os_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--精准曝光,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end as card_content_type,
CASE when transaction_type in ('fmctr') then 'fmctr'
WHEN transaction_type like '%ctr' THEN 'ctr预估'
WHEN transaction_type like '%cvr' THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片'
when transaction_type in ('hotspot_feed') then 'hotspot_feed'
END AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND (transaction_type in ('-1','smr','hotspot','pgc','newdata','hotspot_feed')
or transaction_type like '%ctr' or transaction_type like '%cvr')
AND card_content_type in ('qa','diary','user_post','answer')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
cl_id,
CASE when transaction_type in ('fmctr') then 'fmctr'
WHEN transaction_type like '%ctr' THEN 'ctr预估'
WHEN transaction_type like '%cvr' THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片'
when transaction_type in ('hotspot_feed') then 'hotspot_feed' END,
card_id,
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t2
on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN
(--卡片,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end as card_content_type,
CASE when params['transaction_type'] in ('fmctr') then 'fmctr'
WHEN params['transaction_type'] like '%ctr' THEN 'ctr预估'
WHEN params['transaction_type'] like '%cvr' THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片'
when params['transaction_type'] in ('hotspot_feed') then 'hotspot_feed'
END AS recommend_type,
params['card_id'] as card_id,
app_session_id
from online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND (params['transaction_type'] in ('-1','smr','hotspot','pgc','newdata','hotspot_feed')
or params['transaction_type'] like '%ctr' or params['transaction_type'] like '%cvr')
AND params['card_content_type'] in ('qa','diary','user_post','answer')
GROUP BY partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end,
CASE when params['transaction_type'] in ('fmctr') then 'fmctr'
WHEN params['transaction_type'] like '%ctr' THEN 'ctr预估'
WHEN params['transaction_type'] like '%cvr' THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片'
when params['transaction_type'] in ('hotspot_feed') then 'hotspot_feed' END,
params['card_id'],
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t3
on t2.partition_date=t3.partition_date
and t2.cl_id=t3.cl_id
and t2.card_id=t3.card_id
and t2.card_content_type=t3.card_content_type
and t2.recommend_type=t3.recommend_type
LEFT JOIN
(--页面浏览时长
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND referrer='home'
AND page_stay>=0 AND page_stay<1000
GROUP BY partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t4
on t4.partition_date=t3.partition_date
and t4.cl_id=t3.cl_id
and t4.business_id=t3.card_id
and t4.page_name=t3.card_content_type
LEFT JOIN
(--搜索框和点击行为
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action in ('on_click_navbar_search','do_search')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t5
on t5.partition_date=t3.partition_date
and t5.cl_id=t3.cl_id
and t5.business_id=t3.card_id
and t5.page_name=t3.card_content_type
LEFT JOIN
(--点击高亮词
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as highlight_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_type']='highlight_word'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t6
on t6.partition_date=t3.partition_date
and t6.cl_id=t3.cl_id
and t6.business_id=t3.card_id
and t6.page_name=t3.card_content_type
LEFT JOIN
(--关联的美购卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as self_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top'
or (params['in_page_pos']='bottom' and params['position'] is null and cl_type='android')
or (params['in_page_pos']='bottom' and params['card_but_pos'] is not null and cl_type='ios'))
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name IN ('diary_detail','topic_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t7
on t7.partition_date=t3.partition_date
and t7.cl_id=t3.cl_id
and t7.business_id=t3.card_id
and t7.page_name=t3.card_content_type
LEFT JOIN
(--推荐的美购卡片(需要排除作者消费的美购)
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (action='on_click_card'and params['card_content_type']='service'
or action='on_click_button' and params['button_name']='unfold' and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail')
or action='on_click_button' and params['button_name'] = 'more_recommendations')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t8
on t8.partition_date=t3.partition_date
and t8.cl_id=t3.cl_id
and t8.business_id=t3.card_id
and t8.page_name=t3.card_content_type
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_content_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_card'
and params['card_content_type'] in ('qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t9
on t9.partition_date=t3.partition_date
and t9.cl_id=t3.cl_id
and t9.business_id=t3.card_id
and t9.page_name=t3.card_content_type
LEFT JOIN
(--转诊点击,视频面诊点击
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as video_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_button'
and params['button_name']='video_interview'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t10
on t10.partition_date=t3.partition_date
and t10.cl_id=t3.cl_id
and t10.business_id=t3.card_id
and t10.page_name=t3.card_content_type
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as referral_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='on_click_button'
and params['button_name']='referral'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t11
on t11.partition_date=t3.partition_date
and t11.cl_id=t3.cl_id
and t11.business_id=t3.card_id
and t11.page_name=t3.card_content_type
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=t2.cl_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on t2.partition_date=dev.partition_date and t2.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type;
--引用2020年1月至今数据
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card*0.2+transfer_card+video_consultation)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
-- '未配置' as `来自I的推荐专题点击pv`,
transfer_card as `来自I的转诊点击pv`,
video_consultation as `来自I的视频面诊点击pv`FROM pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200627' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and day_id>='20200101'
order by `日期` desc ,`设备类型`,`活跃类型`,`卡片类型`,`推荐类型`;
select *,ROUND(cast(card_click as double) /cast(card_exposure as double),4) as card_click_divide_card_exposure from pm.tl_pm_recommend_strategy_d where device_os_type = 'android' and active_type = '新增' and recommend_type = 'ctr预估' order by day_id desc
CREATE TABLE recommend_strategy_d(
day_id varchar(100),
device_os_type varchar(100),
active_type varchar(100),
card_content_type varchar(100),
recommend_type varchar(100),
card_click BIGINT(20),
card_exposure BIGINT(20),
avg_page_stay double,
navbar_search BIGINT(20),
highlight_word BIGINT(20),
self_welfare_card BIGINT(20),
recommend_welfare_card BIGINT(20),
recommend_content_card BIGINT(20),
recommend_special_card BIGINT(20),
transfer_card BIGINT(20),
video_consultation BIGINT(20),
partition_day varchar(100),
pid varchar(100),
PRIMARY KEY ( pid )
)ENGINE=InnoDB DEFAULT CHARSET=utf8;
alert
ALTER TABLE recommend_strategy_d ADD recommend_ctr FLOAT;
ALTER TABLE recommend_strategy_d ADD second_jump_ctr FLOAT;
ALTER TABLE recommend_strategy_d ADD click_ctr FLOAT;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment