Commit d70555b8 authored by 赵建伟's avatar 赵建伟

update codes

parent ef1aedd3
No preview for this file type
......@@ -2,14 +2,29 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="4ef7a995-babe-48ad-a058-4897dd73703a" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/../../shell/hive" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/en-cn.properties" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/en-cn.properties" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/create_daliy_content.sql" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/create_daily_content_data.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/daily_content_data_insert.sql" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/daily_content_data.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step2.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step2.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step3.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step3.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step4.job" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/report/daily_content_data.sql" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_content_data/report/select_daily_content_data.sql" afterDir="false" />
<change afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/etl/daily_recommend_strategy.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/en-cn.properties" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/en-cn.properties" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/create_daily_content_data.sql" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/etl/create_daily_recommend_strategy.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/etl/daily_content_data.sql" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/daily_content_data.zip" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_1.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_1.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_10.job" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_11.job" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_12.job" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_13.job" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_2.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_2.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_3.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_3.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_4.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_4.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_5.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_5.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_6.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_6.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_7.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_7.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_8.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_8.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step1_9.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step1_9.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step2.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step2.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/job/step3.job" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/job/step3.job" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/readme.txt" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/readme.txt" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../pm/daily_content_data/report/select_daily_content_data.sql" beforeDir="false" afterPath="$PROJECT_DIR$/../../../pm/daily_recommend_strategy/report/select_daily_recommend_strategy.sql" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../../../readme.txt" beforeDir="false" afterPath="$PROJECT_DIR$/../../../readme.txt" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
......@@ -64,6 +79,7 @@
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="ASKED_ADD_EXTERNAL_FILES" value="true" />
<property name="RequestMappingsPanelOrder0" value="0" />
<property name="RequestMappingsPanelOrder1" value="1" />
<property name="RequestMappingsPanelWidth0" value="75" />
......@@ -188,13 +204,16 @@
<workItem from="1591668349091" duration="1455000" />
<workItem from="1592214013610" duration="2379000" />
<workItem from="1592568026557" duration="772000" />
<workItem from="1593835574199" duration="1924000" />
<workItem from="1593835574199" duration="3629000" />
</task>
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="VcsManagerConfiguration">
<option name="ADD_EXTERNAL_FILES_SILENTLY" value="true" />
</component>
<component name="WindowStateProjectService">
<state x="773" y="303" key="#com.intellij.ide.util.MemberChooser" timestamp="1593835959317">
<screen x="0" y="22" width="1920" height="1129" />
......
select_daily_content_data=内容日报-新
\ No newline at end of file
INSERT OVERWRITE TABLE pm.tl_pm_content_d PARTITION (PARTITION_DAY = #partition_day)
SELECT
T1.partition_date AS day_id
,T1.device_os_type AS device_os_type
,T1.active_type AS active_type
,T1.channel AS is_ai_channel
,COALESCE(T1.dau,0) AS dau
,COALESCE(T2.neirong_uv,0) AS content_uv
,COALESCE(T2.neirong_pv,0) AS content_pv
,COALESCE(ROUND(T2.neirong_uv/T1.dau,4),0) AS per_content_uv
,COALESCE(ROUND(T2.neirong_pv/T2.neirong_uv,4),0) AS per_content_pv
,COALESCE(CONCAT(ROUND(T4.retention_num1/T2.neirong_uv*100,4),'%'),0) AS retention_1
,COALESCE(CONCAT(ROUND(T4.retention_num7/T2.neirong_uv*100,4),'%'),0) AS retention_7
,COALESCE(CONCAT(ROUND(T4.retention_num30/T2.neirong_uv*100,4),'%'),0) AS retention_30
,COALESCE(T5.app_duration,0) AS avg_app_duration
,COALESCE(T3.neirong_stay,0) AS avg_content_stay
,COALESCE(T5.avg_opentimes,0) AS avg_open_times
,COALESCE(T9.search_stay,0) AS search_related_stay
,COALESCE(T9.welfare_stay,0) AS welfare_stay
,COALESCE(T9.question_stay,0) AS content_question_stay
,COALESCE(T9.ai_related_stay,0) AS ai_related_stay
,COALESCE(T9.diary_stay,0) AS content_diary_stay
,COALESCE(T9.home_stay,0) AS home_stay
,COALESCE(T9.conv_stay,0) AS conv_related_stay
,COALESCE(ROUND(T6.recommend_uv/T2.neirong_uv,4),0) AS recommend_rate
,COALESCE(ROUND(T6.recommend_pv/T6.recommend_uv,4),0) AS per_recommend_pv
,COALESCE(ROUND(T6.feeds_uv/T2.neirong_uv,4),0) AS feeds_rate
,COALESCE(ROUND(T6.feeds_pv/T6.feeds_uv,4),0) AS per_feeds_pv
,COALESCE(ROUND(T6.search_uv/T2.neirong_uv,4),0) AS search_rate
,COALESCE(ROUND(T6.search_pv/T6.search_uv,4),0) AS per_search_pv
,COALESCE(ROUND(T6.zone_uv/T2.neirong_uv,4),0) AS zone_rate
,COALESCE(ROUND(T6.zone_pv/T6.zone_uv,4),0) AS per_zone_pv
,COALESCE(ROUND(T6.content_uv/T2.neirong_uv,4),0) AS content_rate
,COALESCE(ROUND(T6.content_pv/T6.content_uv,4),0) AS per_from_content_pv
,COALESCE(ROUND(T6.blank_uv/T2.neirong_uv,4),0) AS blank_rate
,COALESCE(ROUND(T6.blank_pv/T6.blank_uv,4),0) AS per_blank_pv
,COALESCE(ROUND(T6.comment_uv/T2.neirong_uv,4),0) AS comment_rate
,COALESCE(ROUND(T6.comment_pv/T6.comment_uv,4),0) AS per_comment_pv
,COALESCE(ROUND(T6.org_uv/T2.neirong_uv,4),0) AS org_rate
,COALESCE(ROUND(T6.org_pv/T6.org_uv,4),0) AS per_org_pv
,COALESCE(ROUND(T6.category_uv/T2.neirong_uv,4),0) AS category_rate
,COALESCE(ROUND(T6.category_pv/T6.category_uv,4),0) AS per_category_pv
,COALESCE(ROUND(T6.my_diary_uv/T2.neirong_uv,4),0) AS my_diary_rate
,COALESCE(ROUND(T6.my_diary_pv/T6.my_diary_uv,4),0) AS per_my_diary_pv
,COALESCE(ROUND(T6.ai_uv/T2.neirong_uv,4),0) AS ai_rate
,COALESCE(ROUND(T6.ai_pv/T6.ai_uv,4),0) AS per_ai_pv
,null AS create_topic_num
,null AS create_reply_num
,COALESCE(T2.diary_uv,0) AS diary_uv
,COALESCE(T2.diary_pv,0) AS diary_pv
,COALESCE(ROUND(T2.diary_uv/T2.neirong_uv,4),0) AS diary_rate
,COALESCE(ROUND(T2.diary_pv/T2.diary_uv,4),0) AS per_diary_pv
,COALESCE(T3.diary_stay,0) AS diary_stay
,COALESCE(T2.post_uv,0) AS post_uv
,COALESCE(T2.post_pv,0) AS post_pv
,COALESCE(ROUND(T2.post_uv/T2.neirong_uv,4),0) AS post_rate
,COALESCE(ROUND(T2.post_pv/T2.post_uv,4),0) AS per_post_pv
,COALESCE(T3.post_stay,0) AS post_stay
,COALESCE(T2.question_uv,0) AS question_uv
,COALESCE(T2.question_pv,0) AS question_pv
,COALESCE(ROUND(T2.question_uv/T2.neirong_uv,4),0) AS question_rate
,COALESCE(ROUND(T2.question_pv/T2.question_uv,4),0) AS per_question_pv
,COALESCE(T3.question_stay,0) AS question_stay
,COALESCE(T2.question_answer_uv,0) AS question_answer_uv
,COALESCE(T2.question_answer_pv,0) AS question_answer_pv
,COALESCE(ROUND(T2.question_answer_uv/T2.neirong_uv,4),0) AS question_answer_rate
,COALESCE(ROUND(T2.question_answer_pv/T2.question_answer_uv,4),0) AS per_question_answer_pv
,COALESCE(T3.question_answer_stay,0) AS question_answer_stay
,COALESCE(T2.answer_uv,0) AS answer_uv
,COALESCE(T2.answer_pv,0) AS answer_pv
,COALESCE(ROUND(T2.answer_uv/T2.neirong_uv,4),0) AS answer_rate
,COALESCE(ROUND(T2.answer_pv/T2.answer_uv,4),0) AS per_answer_pv
,COALESCE(T3.answer_stay,0) AS answer_stay
,COALESCE(T2.video_uv,0) AS video_uv
,COALESCE(T2.video_pv,0) AS video_pv
,COALESCE(ROUND(T2.video_uv/T2.neirong_uv,4),0) AS video_rate
,COALESCE(ROUND(T2.video_pv/T2.video_uv,4),0) AS per_video_pv
,COALESCE(T3.video_stay,0) AS video_stay
,COALESCE(T2.wiki_uv,0) AS wiki_uv
,COALESCE(T2.wiki_pv,0) AS wiki_pv
,COALESCE(ROUND(T2.wiki_uv/T2.neirong_uv,4),0) AS wiki_rate
,COALESCE(ROUND(T2.wiki_pv/T2.wiki_uv,4),0) AS per_wiki_pv
,COALESCE(T3.wiki_stay,0) AS wiki_stay
,COALESCE(T2.article_uv,0) AS article_uv
,COALESCE(T2.article_pv,0) AS article_pv
,COALESCE(ROUND(T2.article_uv/T2.neirong_uv,4),0) AS article_rate
,COALESCE(ROUND(T2.article_pv/T2.article_uv,4),0) AS per_article_pv
,COALESCE(T3.article_stay,0) AS article_stay
FROM
(--基础维度/dau
SELECT partition_date,device_os_type,active_type,t2.channel,count(distinct device_id) AS dau
FROM
(
SELECT partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
LATERAL VIEW explode(t1.channel) t2 AS channel
GROUP BY partition_date,device_os_type,active_type,t2.channel
)T1
LEFT JOIN
(--内容uv/pv
SELECT partition_date
,device_os_type
,active_type
,t4.channel
,count(cl_id) as neirong_pv
,count(distinct cl_id) as neirong_uv
,count(CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END) AS diary_pv
,count(distinct CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END) AS diary_uv
,count(CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END) AS post_pv
,count(distinct CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END) AS post_uv
,count(CASE WHEN page_name ='question_detail' THEN cl_id END) AS question_pv
,count(distinct CASE WHEN page_name ='question_detail' THEN cl_id END) AS question_uv
,count(CASE WHEN page_name ='question_answer_detail' THEN cl_id END) AS question_answer_pv
,count(distinct CASE WHEN page_name ='question_answer_detail' THEN cl_id END) AS question_answer_uv
,count(CASE WHEN page_name='answer_detail' THEN cl_id END) AS answer_pv
,count(distinct CASE WHEN page_name='answer_detail' THEN cl_id END) AS answer_uv
,count(CASE WHEN page_name='video_steep' THEN cl_id END) AS video_pv
,count(distinct CASE WHEN page_name='video_steep' THEN cl_id END) AS video_uv
,count(CASE WHEN page_name='article_detail' THEN cl_id END) AS article_pv
,count(distinct CASE WHEN page_name='article_detail' THEN cl_id END) AS article_uv
,count(CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_pv
,count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END) AS wiki_uv
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
JOIN
(
SELECT partition_date,page_name,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T2
ON T1.partition_date=T2.partition_date
AND T1.device_os_type=T2.device_os_type
AND T1.active_type=T2.active_type
AND T1.channel=T2.channel
LEFT JOIN
(--内容浏览时长
SELECT partition_date
,device_os_type
,active_type
,t4.channel
,round(sum(page_stay)/count(distinct cl_id)/60,4) as neirong_stay
,round(sum(CASE WHEN page_name IN ('diary_detail','topic_detail') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('diary_detail','topic_detail') THEN cl_id END)/60,4) AS diary_stay
,round(sum(CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('post_detail','user_post_detail','doctor_post_detail') THEN cl_id END)/60,4) AS post_stay
,round(sum(CASE WHEN page_name ='question_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='question_detail' THEN cl_id END)/60,4) AS question_stay
,round(sum(CASE WHEN page_name ='question_answer_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='question_answer_detail' THEN cl_id END)/60,4) AS question_answer_stay
,round(sum(CASE WHEN page_name ='answer_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='answer_detail' THEN cl_id END)/60,4) AS answer_stay
,round(sum(CASE WHEN page_name ='video_steep' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='video_steep' THEN cl_id END)/60,4) AS video_stay
,round(sum(CASE WHEN page_name ='article_detail' THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name ='article_detail' THEN cl_id END)/60,4) AS article_stay
,round(sum(CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN page_stay else 0 END)/
count(distinct CASE WHEN page_name IN ('wiki_detail','product_detail','wiki_brand','wiki_collect') THEN cl_id END)/60,4) AS wiki_stay
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.page_name,t2.page_stay
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
JOIN
(
SELECT partition_date,page_name,cl_id,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
AND page_stay>=0 AND page_stay<1000
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T3
ON T1.partition_date=T3.partition_date
AND T1.device_os_type=T3.device_os_type
AND T1.active_type=T3.active_type
AND T1.channel=T3.channel
LEFT JOIN
(--内容用户留存
SELECT regexp_replace(partition_date,'-','') AS partition_date
,device_os_type,active_type,t5.channel
,int(count(DISTINCT CASE WHEN date_add(partition_date,1) = retention_date THEN device_id END)) AS retention_num1
,int(count(DISTINCT CASE WHEN date_add(partition_date,6) = retention_date THEN device_id END)) AS retention_num7
,int(count(DISTINCT CASE WHEN date_add(partition_date,29) = retention_date THEN device_id END)) AS retention_num30
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t3.device_id,t3.partition_date as retention_date
FROM
(
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT cl_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY cl_id,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2))
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
LEFT JOIN
(--活跃设备
SELECT device_id
,concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) AS partition_date
FROM online.ml_device_day_active_status
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
)t3
ON t2.cl_id=t3.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY regexp_replace(partition_date,'-',''),device_os_type,active_type,t5.channel
)T4
ON T1.partition_date=T4.partition_date
AND T1.device_os_type=T4.device_os_type
AND T1.active_type=T4.active_type
AND T1.channel=T4.channel
LEFT JOIN
(--内容用户单设备app时长(m)
SELECT partition_date
,device_os_type
,active_type
,t5.channel
,round(sum(use_duration)/count(distinct cl_id)/60,4) as app_duration
,round(avg(open_times),4) as avg_opentimes
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.use_duration,t3.open_times
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
LEFT JOIN
(
SELECT partition_date,device_id,use_duration,open_times
FROM online.ml_device_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND use_duration > 0 and use_duration < 86400
)t3
on t2.partition_date=t3.partition_date
AND t2.cl_id=t3.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_os_type,active_type,t5.channel
)T5
ON T1.partition_date=T5.partition_date
AND T1.device_os_type=T5.device_os_type
AND T1.active_type=T5.active_type
AND T1.channel=T5.channel
LEFT JOIN
(--不同来源进入内容uv/pv
SELECT partition_date
,device_os_type
,active_type
,t4.channel
,count(CASE WHEN referrer='search' THEN cl_id END) AS search_pv
,count(distinct CASE WHEN referrer='search' THEN cl_id END) AS search_uv
,count(CASE WHEN referrer='zone_v3' THEN cl_id END) AS zone_pv
,count(distinct CASE WHEN referrer='zone_v3' THEN cl_id END) AS zone_uv
,count(CASE WHEN referrer='feeds' THEN cl_id END) AS feeds_pv
,count(distinct CASE WHEN referrer='feeds' THEN cl_id END) AS feeds_uv
,count(CASE WHEN referrer='recommend' THEN cl_id END) AS recommend_pv
,count(distinct CASE WHEN referrer='recommend' THEN cl_id END) AS recommend_uv
,count(CASE WHEN referrer='content' THEN cl_id END) as content_pv
,count(distinct CASE WHEN referrer='content' THEN cl_id END) as content_uv
,count(CASE WHEN referrer='blank' THEN cl_id END) as blank_pv
,count(distinct CASE WHEN referrer='blank' THEN cl_id END) as blank_uv
,count(CASE WHEN referrer='comment' THEN cl_id END) as comment_pv
,count(distinct CASE WHEN referrer='comment' THEN cl_id END) as comment_uv
,count(CASE WHEN referrer='org' THEN cl_id END) as org_pv
,count(distinct CASE WHEN referrer='org' THEN cl_id END) as org_uv
,count(CASE WHEN referrer='category' THEN cl_id END) as category_pv
,count(distinct CASE WHEN referrer='category' THEN cl_id END) as category_uv
,count(CASE WHEN referrer='my_diary' THEN cl_id END) as my_diary_pv
,count(distinct CASE WHEN referrer='my_diary' THEN cl_id END) as my_diary_uv
,count(CASE WHEN referrer='ai' THEN cl_id END) as ai_pv
,count(distinct CASE WHEN referrer='ai' THEN cl_id END) as ai_uv
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t2.referrer
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
LEFT JOIN
( --不同页面进入内容
SELECT partition_date,cl_id
,case when referrer like 'search_result%' then 'search'
when referrer ='' then 'blank'
when referrer = 'zone_v3' then 'zone_v3'
when referrer='all_case_service_comment' then 'comment'
when referrer in ('organization_detail','expert_detail') then 'org'
when referrer = 'category' then 'category'
when referrer ='my_diary' then 'my_diary'
when referrer in ('face_detect_result','report_result') then 'ai'
when referrer in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail') then 'content'
else null end as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
and action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
UNION ALL
--首页feeds进入内容(首页非策略卡片点击)
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
AND params['transaction_type'] not in ('-1','ctr','cvr','smr','newdata')
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
--首页feeds进入内容(首页非策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ( 'on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
AND params['transaction_type'] not in ('-1','ctr','cvr','smr','newdata')
UNION ALL
--推荐进入内容(首页策略卡片点击),5月7日新增transaction_type类型
SELECT partition_date,cl_id,'recommend' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action = 'on_click_card'
AND params['transaction_type'] in ('-1','ctr','cvr','smr','newdata')
AND params['card_content_type'] IN ('diary','diary_topic','user_post','doctor_post','question','answer','qa','live','article')
UNION ALL
--推荐进入内容(首页策略卡片点击) 7.8.0版本前的埋点
SELECT partition_date,cl_id,'feeds' as referrer
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND page_name = 'home'
AND action in ('on_click_diary_card','on_click_answer_card','on_click_question_card','on_click_topic_card','on_click_live_card')
AND params['transaction_type'] in ('-1','ctr','cvr','smr','newdata')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t3
LATERAL VIEW explode(t3.channel) t4 AS channel
GROUP BY partition_date,device_os_type,active_type,t4.channel
)T6
ON T1.partition_date=T6.partition_date
AND T1.device_os_type=T6.device_os_type
AND T1.active_type=T6.active_type
AND T1.channel=T6.channel
LEFT JOIN
(--真实发帖数
SELECT partition_date
,device_os_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t3.id
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
LEFT JOIN
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.device_id
LEFT JOIN
(--通过user_id,找到发帖情况
--新增帖子
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_view --发帖情况表
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_online='true'
AND platform in ('1','7') --更美用户发的以及打卡的(去除hera后台,爬虫抓取的,kyc自动回复的)
AND regexp_replace(substr(create_time,1,10),'-','')>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time,1,10),'-','')< regexp_replace((current_date),'-','')
UNION ALL
--新增日记本
SELECT a.user_id,a.id,a.create_date
FROM
(
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_diary_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(created_time,1,10),'-','') < regexp_replace((current_date),'-','')
)a
JOIN
( --取非空日记
SELECT diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false'
)b
ON a.id = b.diary_id
UNION ALL
--新增日记贴
SELECT user_id,id,regexp_replace(substr(created_time,1,10),'-','') as create_date
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(created_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND diary_id is not null
UNION ALL
--新增问题数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_question_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
UNION ALL
--新增回答数
SELECT user_id,id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND regexp_replace(substr(create_time, 1, 10), '-', '') < regexp_replace((current_date),'-','')
AND is_spam = 'false'
AND platform= '99' --更美用户发的(去除hera后台,爬虫抓取的,kyc自动回复的)
)t3
ON t2.partition_date = t3.create_date
AND t2.user_id = t3.user_id
JOIN --限制用户是在app进行的发帖
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '发帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)t4
ON t3.user_id = t4.user_id
AND t3.create_date = t4.partition_date
LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t5
ON t3.user_id=t5.user_id
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_os_type,active_type,t7.channel
)T7
ON T1.partition_date=T7.partition_date
AND T1.device_os_type=T7.device_os_type
AND T1.active_type=T7.active_type
AND T1.channel=T7.channel
LEFT JOIN
(--真实评论数
SELECT partition_date
,device_os_type
,active_type
,t7.channel
,count(distinct id) as num
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t3.id,t3.type
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
LEFT JOIN
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.device_id
LEFT JOIN
( --有评论过日记帖的设备,排除疑似广告
SELECT t1.user_id,reply_date,t1.id,'topic_reply' as type
FROM
(
SELECT user_id,regexp_replace(substr(reply_date,1,10),'-','') as reply_date,problem_id,id
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
-- and diary_id is not null 这个表的diary_id有问题,需要join problem表来判断是不是属于日记
and regexp_replace(substr(reply_date,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(reply_date,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
--group by t1.user_id,reply_date
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT
t1.user_id,t1.reply_date,t1.id as id,'answer_reply' as type
FROM
(
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,answer_id,id
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and regexp_replace(substr(create_time,1,10),'-','') >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t1
JOIN
(
SELECT id,question_id
FROM online.tl_hdfs_answer_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,question_id
)t2
ON t2.id = t1.answer_id
UNION ALL
--有评论过用户帖的设备
SELECT user_id,regexp_replace(substr(create_time,1,10),'-','') as reply_date,id,'tractate_reply' as type
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >=regexp_replace(DATE_SUB(current_date,60) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') < regexp_replace((current_date),'-','')
)t3
ON t2.partition_date = t3.reply_date
AND t2.user_id = t3.user_id
JOIN --限制用户是在app进行的回复
(
SELECT a.partition_date,user_id
FROM
(
SELECT partition_date,user_id,action
FROM online.bl_hdfs_operation_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date< regexp_replace((current_date),'-','')
)a
JOIN
(
SELECT code
FROM dim.dim_community_action_type
WHERE communityuserbehavior_type_name = '回帖'
)type
ON a.action = code
GROUP BY a.partition_date,user_id
)t4
ON t3.user_id = t4.user_id
AND t3.reply_date = t4.partition_date
LEFT JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t5
ON t3.user_id=t5.user_id
where (t5.user_id is null or t5.user_id = '')
)t6
LATERAL VIEW explode(t6.channel) t7 AS channel
GROUP BY partition_date,device_os_type,active_type,t7.channel
)T8
ON T1.partition_date=T8.partition_date
AND T1.device_os_type=T8.device_os_type
AND T1.active_type=T8.active_type
AND T1.channel=T8.channel
LEFT JOIN
(--部分页面的单设备页面浏览时长
SELECT partition_date
,device_os_type
,active_type
,t5.channel
,round(sum(CASE WHEN page_name like 'search%' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS search_stay
,round(sum(CASE WHEN page_name = 'welfare_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS welfare_stay
,round(sum(CASE WHEN page_name = 'question_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS question_stay
,round(sum(CASE WHEN page_name in ('report_result','face_scan','face_detect_result','face_scan_loading','face_institute_report') THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS ai_related_stay
,round(sum(CASE WHEN page_name = 'diary_detail' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS diary_stay
,round(sum(CASE WHEN page_name = 'home' THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS home_stay
,round(sum(CASE WHEN page_name in ('conversation_detail','consult_home') THEN page_stay else 0 END)/count(distinct cl_id)/60,4) AS conv_stay
FROM
(
SELECT t1.partition_date,device_os_type,active_type,channel,t2.cl_id,t3.page_name,t3.page_stay
FROM
(
SELECT
partition_date
,m.device_id, device_os_type
,case WHEN active_type = '4' THEN '老活跃设备'
WHEN active_type in ('1','2') then '新增设备' END as active_type
,array(CASE WHEN first_channel_source_type like '%xinyouxingkong%' or a.device_id is not NULL THEN '可疑'
WHEN (partition_date>='20190601' and tmp.col2 = 'AI')
or (partition_date < '20200301' AND partition_date>='20190601' and first_channel_source_type like 'promotion_toutiao_jy%')
or (partition_date>='20200601' and ((first_channel_source_type like 'promotion_toutiao_jy%') or (first_channel_source_type like 'dyand%') or (first_channel_source_type like 'douyin%')))
THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT col1,col2 --col1:子渠道,col2:是否属于AI,col3:标识
FROM pm.tl_pm_ydl
WHERE col3='0204_danlei_channel')tmp
on first_channel_source_type=tmp.col1
LEFT JOIN
(SELECT DISTINCT device_id
FROM al.al_pm_ct_dv_deviceappversionrollbackfrom20200101_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,2) ,'-',''))a
ON m.device_id = a.device_id
where partition_date>=regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
)t1
JOIN
(--内容用户
SELECT partition_date,cl_id
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','product_detail','wiki_brand','wiki_collect')
GROUP BY partition_date,cl_id
)t2
ON t1.partition_date=t2.partition_date
AND t1.device_id=t2.cl_id
LEFT JOIN
(--部分页面的停留时长
SELECT partition_date,cl_id,page_name,page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= regexp_replace(DATE_SUB(current_date,60) ,'-','')
AND partition_date < regexp_replace((current_date),'-','')
AND action = 'page_view'
AND (page_name like 'search%' or page_name IN ('welfare_detail','question_detail','report_result','face_scan'
,'face_detect_result','face_scan_loading','face_institute_report','diary_detail','home','conversation_detail','consult_home') )
AND page_name!='search_result'--android埋点会在上报search_result_more时重复上报search_result的埋点,导致page_stay重复计算
AND page_stay>=0 AND page_stay<1000
)t3
ON t2.partition_date=t3.partition_date
AND t2.cl_id=t3.cl_id
LEFT JOIN
( -- 去掉疑似机构刷量的PV和UV
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
)spam_pv
on t2.cl_id=spam_pv.device_id
WHERE spam_pv.device_id IS NULL
)t4
LATERAL VIEW explode(t4.channel) t5 AS channel
GROUP BY partition_date,device_os_type,active_type,t5.channel
)T9
ON T1.partition_date=T9.partition_date
AND T1.device_os_type=T9.device_os_type
AND T1.active_type=T9.active_type
AND T1.channel=T9.channel
ORDER BY day_id desc,device_os_type,active_type,is_ai_channel
#step1_10.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_view
\ No newline at end of file
#step1_11.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
\ No newline at end of file
#step1_12.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
\ No newline at end of file
#step1_13.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_diary_view
\ No newline at end of file
--内容日报
SELECT
day_id AS `日期`
,device_os_type AS `设备类型`
,active_type AS `活跃类型`
,is_ai_channel AS `是否AI渠道`
,dau AS `DAU`
,content_uv AS `内容详情页UV`
,content_pv AS `内容详情页PV`
,per_content_uv AS `内容UV/DAU`
,per_content_pv AS `内容PV/内容UV`
,retention_1 AS `内容用户APP次留`
,retention_7 AS `内容用户APP7留`
,retention_30 AS `内容用户APP30留`
,avg_app_duration AS `内容用户单设备App时长(m)`
,avg_content_stay AS `内容用户单设备内容时长(m)`
,avg_open_times AS `内容用户单设备打开次数`
,search_related_stay AS `内容用户搜索相关页面单设备页面时长(m)`
,welfare_stay AS `内容用户美购详情页单设备页面时长(m)`
,content_question_stay AS `内容用户问题详情页单设备页面时长(m)`
,ai_related_stay AS `内容用户AI相关页面单设备页面时长(m)`
,content_diary_stay AS `内容用户日记详情页单设备页面时长(m)`
,home_stay AS `内容用户首页单设备页面时长(m)`
,conv_related_stay AS `内容用户咨询相关页面单设备页面时长(m)`
,recommend_rate AS `首页feeds推荐进入内容UV/内容UV`
,per_recommend_pv AS `首页feeds推荐进入内容PV/UV`
,feeds_rate AS `首页feeds非推荐进入内容UV/内容UV`
,per_feeds_pv AS `首页feeds非推荐进入内容PV/UV`
,search_rate AS `搜索进入内容UV/内容UV`
,per_search_pv AS `搜索进入内容PV/UV`
,zone_rate AS `内容聚合页进入内容UV/内容UV`
,per_zone_pv AS `内容聚合页进入内容PV/UV`
,content_rate AS `内容详情页推荐板块进入内容UV/内容UV`
,per_from_content_pv AS `内容详情页推荐板块进入内容PV/UV`
,blank_rate AS `无来源页面(大多数为push)进入内容UV/内容UV`
,per_blank_pv AS `无来源页面(大多数为push)进入内容PV/UV`
,comment_rate AS `评论列表页进入内容UV/内容UV`
,per_comment_pv AS `评论列表页进入内容PV/UV`
,org_rate AS `医生医院主页进入内容UV/内容UV`
,per_org_pv AS `医生医院主页进入内容PV/UV`
,category_rate AS `品类聚合页进入内容UV/内容UV`
,per_category_pv AS `品类聚合页进入内容PV/UV`
,my_diary_rate AS `我的日记页进入内容UV/内容UV`
,per_my_diary_pv AS `我的日记页进入内容PV/UV`
,ai_rate AS `AI报告页进入内容UV/内容UV`
,per_ai_pv AS `AI报告页进入内容PV/UV`
,diary_uv AS `日记UV`
,diary_pv AS `日记PV`
,diary_rate AS `日记UV/内容UV`
,per_diary_pv AS `日记PV/日记UV`
,diary_stay AS `日记单设备时长(m)`
,post_uv AS `帖子UV`
,post_pv AS `帖子PV`
,post_rate AS `帖子UV/内容UV`
,per_post_pv AS `帖子PV/帖子UV`
,post_stay AS `帖子单设备时长(m)`
,question_uv AS `问题UV`
,question_pv AS `问题PV`
,question_rate AS `问题UV/内容UV`
,per_question_pv AS `问题PV/问题UV`
,question_stay AS `问题单设备时长(m)`
,question_answer_uv AS `问答UV`
,question_answer_pv AS `问答PV`
,question_answer_rate AS `问答UV/内容UV`
,per_question_answer_pv AS `问答PV/问答UV`
,question_answer_stay AS `问答单设备时长(m)`
,answer_uv AS `回答UV`
,answer_pv AS `回答PV`
,answer_rate AS `回答UV/内容UV`
,per_answer_pv AS `回答PV/回答UV`
,answer_stay AS `回答单设备时长(m)`
,video_uv AS `视频UV`
,video_pv AS `视频PV`
,video_rate AS `视频UV/内容UV`
,per_video_pv AS `视频PV/视频UV`
,video_stay AS `视频单设备时长(m)`
,wiki_uv AS `百科UV`
,wiki_pv AS `百科PV`
,wiki_rate AS `百科UV/内容UV`
,per_wiki_pv AS `百科PV/百科UV`
,wiki_stay AS `百科单设备时长(m)`
,article_uv AS `专栏UV`
,article_pv AS `专栏PV`
,article_rate AS `专栏UV/内容UV`
,per_article_pv AS `专栏PV/专栏UV`
,article_stay AS `专栏单设备时长(m)`
FROM pm.tl_pm_content_d
where partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','');
daily_recommend_strategy=首页推荐策略日报
\ No newline at end of file
SET mapreduce.job.queuename=data;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx8000m;
SET mapreduce.reduce.memory.mb=8192;
SET mapreduce.reduce.java.opts=-Xmx8000m;
set hive.auto.convert.join=true;
SET mapred.reduce.tasks=20;
SET role admin;
ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar;
CREATE TEMPORARY FUNCTION convup AS 'com.gmei.hive.common.udf.UDFConvUpgrade';
INSERT OVERWRITE TABLE pm.tl_pm_recommend_strategy_d PARTITION (PARTITION_DAY = ${partition_day})
SELECT
t1.partition_date as day_id,
t1.device_os_type as device_os_type,
t1.active_type as active_type,
t2.card_content_type as card_content_type,
t2.recommend_type as recommend_type,
NVL(sum(t3.session_pv),0) as card_click,
NVL(sum(t2.session_pv),0) as card_exposure,
NVL(round(sum(page_stay)/count(distinct t4.cl_id)/60,2),0) as avg_page_stay,
NVL(sum(navbar_pv),0) as navbar_search,
NVL(sum(highlight_pv),0) as highlight_word,
NVL(sum(self_wel_pv),0) as self_welfare_card,
NVL(sum(recom_wel_pv),0)-NVL(sum(self_wel_pv),0) as recommend_welfare_card,--需要排除关联的商品卡片点击
NVL(sum(recom_content_pv),0) as recommend_content_card,
NULL as recommend_special_card,
NULL as transfer_card,
NULL as video_consultation
FROM
(
SELECT partition_date
,device_os_type
,CASE WHEN active_type = '4' THEN '老活'
WHEN active_type IN ('1','2') THEN '新增' END AS active_type
,device_id
FROM online.ml_device_day_active_status
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
JOIN
(--精准曝光,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end as card_content_type,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片'
END AS recommend_type,
card_id,
app_session_id
from online.ml_community_precise_exposure_detail
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('page_precise_exposure','home_choiceness_card_exposure') --7745版本action改为page_precise_exposure
AND is_exposure = '1' ----精准曝光
AND page_name ='home'
AND tab_name = '精选'
AND transaction_type in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND card_content_type in ('qa','diary','user_post','answer')
group by partition_date,
case when card_content_type in ('qa','answer') then 'qa' else card_content_type end,
cl_id,
CASE WHEN transaction_type in ('ctr') THEN 'ctr预估'
WHEN transaction_type in ('cvr') THEN 'cvr预估'
WHEN transaction_type in ('-1','smr') THEN 'smr'
when transaction_type in ('pgc','hotspot') then '热点卡片'
when transaction_type in ('newdata') then '保量卡片' END,
card_id,
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t2
on t1.device_id=t2.cl_id and t1.partition_date=t2.partition_date
LEFT JOIN
(--卡片,卡片id和session_id去重
SELECT partition_date,
card_content_type,
cl_id,
recommend_type,
card_id,
count(distinct app_session_id) as session_pv
FROM
(
SELECT partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end as card_content_type,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片'
END AS recommend_type,
params['card_id'] as card_id,
app_session_id
from online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
AND params['page_name'] ='home'
AND params['tab_name'] = '精选'
AND params['transaction_type'] in ('-1','ctr','smr','cvr','hotspot','pgc','newdata')
AND params['card_content_type'] in ('qa','diary','user_post','answer')
GROUP BY partition_date,
cl_id,
case when params['card_content_type'] in ('qa','answer') then 'qa' else params['card_content_type'] end,
CASE WHEN params['transaction_type'] in ('ctr') THEN 'ctr预估'
WHEN params['transaction_type'] in ('cvr') THEN 'cvr预估'
WHEN params['transaction_type'] in ('-1','smr') THEN 'smr'
when params['transaction_type'] in ('pgc','hotspot') then '热点卡片'
when params['transaction_type'] in ('newdata') then '保量卡片' END,
params['card_id'],
app_session_id
)a
group by partition_date,card_content_type,cl_id,recommend_type,card_id
)t3
on t2.partition_date=t3.partition_date
and t2.cl_id=t3.cl_id
and t2.card_id=t3.card_id
and t2.card_content_type=t3.card_content_type
and t2.recommend_type=t3.recommend_type
LEFT JOIN
(--页面浏览时长
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
sum(page_stay) as page_stay
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='page_view'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND referrer='home'
AND page_stay>=0 AND page_stay<1000
GROUP BY partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t4
on t4.partition_date=t3.partition_date
and t4.cl_id=t3.cl_id
and t4.business_id=t3.card_id
and t4.page_name=t3.card_content_type
LEFT JOIN
(--搜索框和点击行为
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as navbar_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action in ('on_click_navbar_search','do_search')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t5
on t5.partition_date=t3.partition_date
and t5.cl_id=t3.cl_id
and t5.business_id=t3.card_id
and t5.page_name=t3.card_content_type
LEFT JOIN
(--点击高亮词
SELECT partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as highlight_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_type']='highlight_word'
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)t6
on t6.partition_date=t3.partition_date
and t6.cl_id=t3.cl_id
and t6.business_id=t3.card_id
and t6.page_name=t3.card_content_type
LEFT JOIN
(--关联的美购卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as self_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (get_json_object(params['extra_param'], '$.type')='交互栏'
or get_json_object(params['extra_param'], '$.jump_from')='msg_link'
or params['in_page_pos']='top' or params['in_page_pos']='bottom')
AND action='on_click_card'
and params['card_content_type']='service'
AND page_name IN ('diary_detail','topic_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t7
on t7.partition_date=t3.partition_date
and t7.cl_id=t3.cl_id
and t7.business_id=t3.card_id
and t7.page_name=t3.card_content_type
LEFT JOIN
(--推荐的美购卡片(需要排除作者消费的美购)
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_wel_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND (action='on_click_card'and params['card_content_type']='service'
or action='on_click_button' and params['button_name']='unfold')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t8
on t8.partition_date=t3.partition_date
and t8.cl_id=t3.cl_id
and t8.business_id=t3.card_id
and t8.page_name=t3.card_content_type
LEFT JOIN
(--推荐的内容卡片
SELECT partition_date,cl_id,business_id,page_name,count(distinct app_session_id) as recom_content_pv
FROM
(
SELECT partition_date,cl_id,business_id,app_session_id,params['card_id'] as card_id,
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end as page_name,
count(1) as service_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
AND action='on_click_card'
and params['card_content_type'] in ('qa','diary','user_post','answer')
AND page_name IN ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
AND (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
group by partition_date,cl_id,business_id,app_session_id,params['card_id'],
case when page_name in ('diary_detail','topic_detail') then 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') then 'user_post'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'qa' else null end
)a
group by partition_date,cl_id,business_id,page_name
)t9
on t9.partition_date=t3.partition_date
and t9.cl_id=t3.cl_id
and t9.business_id=t3.card_id
and t9.page_name=t3.card_content_type
LEFT JOIN
(
select distinct device_id
from ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day=regexp_replace(DATE_SUB(current_date,1) ,'-','')
union all
select distinct device_id
from dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=t2.cl_id
LEFT JOIN
(
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND partition_date<regexp_replace((current_date),'-','')
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
) t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_login_doctor = '1'
) t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
)dev
on t2.partition_date=dev.partition_date and t2.cl_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
GROUP BY t1.partition_date,t1.device_os_type,t1.active_type,t2.card_content_type,t2.recommend_type
order by day_id,device_os_type,active_type,card_content_type,recommend_type;
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_community_precise_exposure_detail
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_updates
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_4.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_d_ct_dv_devicespam_d
\ No newline at end of file
#step1_5.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_operation_updates
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_updates
\ No newline at end of file
#step1_6.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_doctor_view
\ No newline at end of file
#step1_7.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ml_c_ct_ui_user_dimen_d
\ No newline at end of file
#step1_8.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_user_history_detail
\ No newline at end of file
#step1_9.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_question_view
\ No newline at end of file
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_history_detail
\ No newline at end of file
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9,step1_10,step1_11,step1_12,step1_13
command=/home/bi/bi-report/lib/shell/hive daily_content_data
\ No newline at end of file
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8,step1_9
command=/home/bi/bi-report/lib/shell/hive daily_recommend_strategy
\ No newline at end of file
#step3.job
type=command
dependencies=step2
command=curl -X GET http://localhost:8553/api/report/email/daily_content_data/zhaojianwei@igengmei.com/jianweizhao@yeah.net
\ No newline at end of file
command=curl -X GET http://localhost:8553/api/report/email/daily_recommend_strategy/zhaojianwei@igengmei.com/jianweizhao@yeah.net
\ No newline at end of file
SELECT
day_id as `日期`,
device_os_type as `设备类型`,
active_type as `活跃类型`,
card_content_type as `卡片类型`,
recommend_type as `推荐类型`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)/card_exposure*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND(card_click/card_exposure*100,2),'%'),0) as `首页卡片点击PV/首页卡片精准曝光PV`,
NVL(CONCAT(ROUND((navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)/card_click*100,2),'%'),0) as `来自首页推荐内容卡片的的有效二跳pv/首页卡片点击PV`,
card_click as `首页卡片点击PV`,
card_exposure as `首页卡片精准曝光PV`,
(navbar_search+highlight_word+self_welfare_card+recommend_welfare_card+recommend_content_card/2)as `有效二跳pv`,
avg_page_stay as `来自I的单PV平均浏览时长`,
navbar_search as `来自I的搜索框+搜索按钮点击PV`,
highlight_word as `来自I的文内搜索点击PV`,
self_welfare_card as `来自I的商品卡片点击PV`,
recommend_welfare_card as `来自I的推荐商品+查看全部商品点击pv`,
recommend_content_card as `来自I的推荐内容点击pv`,
'未配置' as `来自I的推荐专题点击pv`,
'未上线' as `来自I的转诊点击pv`,
'未上线' as `来自I的视频面诊点击pv`
FROM pm.tl_pm_recommend_strategy_d
WHERE partition_day>='20200627' and partition_day<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
order by `日期` desc ,`设备类型`,`活跃类型`,`卡片类型`,`推荐类型`;
\ No newline at end of file
......@@ -34,3 +34,10 @@ BI report project init.
4.优化邮件内容,wps打开去掉 样式格式化【metabase中包含内容简略视图,而且有metabaselogo】 邮件内容格式化 python实现 ok
5.一个附件多个sheet 或者 一封邮件 多个附件【metabase默认支持指定多个问题、多个附件】 python发送邮件实现,一个文件多个sheet实现起来相对比较难 发送邮件时需要指定附件名称(带账期) ok
6.excel文件内容格式化 优先级比较低 【指定每个字段的最大长度】 使用python实现,尝试 ok
规范约定:
1.job文件的命名
必须以job1_01的方式命名,以便在azkaban中可以有序查看
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment