Commit 1ddddfe6 authored by 魏艺敏's avatar 魏艺敏

Merge branch 'hanyingyue' into 'master'

Hanyingyue

See merge request !112
parents 57b8f8a9 e8cfc11c
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="19033445-2f8f-4eb0-b725-3c6ceb4feec5" name="Default Changelist" comment="" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="ProjectId" id="1j8ToKg5KFGxoUT3YN9ayP8O6dp" />
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">
<property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
</component>
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="19033445-2f8f-4eb0-b725-3c6ceb4feec5" name="Default Changelist" comment="" />
<created>1603184895427</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1603184895427</updated>
</task>
<servers />
</component>
</project>
\ No newline at end of file
core_daily=平台产品核心业务数据
home_daily=首页内容核心数据
search_daily=搜索内容核心数据
ai_daily=AI工具核心数据
#step1_1.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online bl_hdfs_maidian_updates
\ No newline at end of file
#step1_2.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online ml_device_day_active_status
\ No newline at end of file
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive ml ML_D_CT_DV_DEVICECLEAN_DIMEN_D
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive dim DIM_AI_CHANNEL_ZP_NEW
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_topicreply_view
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_problem_view
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_answer_reply_view
#step1_3.job
type=command
command=sh /home/bi/bi-report/lib/shell/waitsuccess.sh hive online tl_hdfs_api_tractate_reply_view
#step2.job
type=command
dependencies=step1_1,step1_2,step1_3,step1_4,step1_5,step1_6,step1_7,step1_8
command=curl -X GET http://localhost:8553/api/report/email/core_daily/hanyingyue@igengmei.com/weiyimin@igengmei.com
\ No newline at end of file
--AI工具核心数据
SELECT partition_date as `日期`
,if(ios_new_ai_retention='0.0%','-',ios_new_ai_retention) as `iOS新在AI功能次留`
,ios_new_gc_uv as `iOS新在AI分发页PVgood click/功能使用uv`
,if(andr_new_ai_retention='0.0%','-',andr_new_ai_retention) as `android新在AI功能次留`
,andr_new_gc_uv as `android新在AI分发页PVgood click/功能使用uv`
FROM
(
SELECT t1.partition_date
,concat(round(count(distinct case when device_os_type ='ios' and active_type ='新增' and channel='ai' then t3.cl_id end)/count(distinct case when device_os_type ='ios' and active_type ='新增' and channel='ai' then t2.cl_id end)*100,2),'%') as ios_new_ai_retention
,concat(round(sum(case when device_os_type ='ios' and active_type ='新增' and channel='ai' then gc_num end)/count(distinct case when device_os_type ='ios' and active_type ='新增' and channel='ai' then t2.cl_id end)*100,2),'%') as ios_new_gc_uv
,concat(round(count(distinct case when device_os_type ='android' and active_type ='新增' and channel='ai' then t3.cl_id end)/count(distinct case when device_os_type ='android' and active_type ='新增' and channel='ai' then t2.cl_id end)*100,2),'%') as andr_new_ai_retention
,concat(round(sum(case when device_os_type ='android' and active_type ='新增' and channel='ai' then gc_num end)/count(distinct case when device_os_type ='android' and active_type ='新增' and channel='ai' then t2.cl_id end)*100,2),'%') as andr_new_gc_uv
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,device_os_type,m.device_id
,CASE WHEN active_type = '4' THEN '老活' when active_type in ('1','2') then '新增' END as active_type
,case when is_ai_channel='true' then 'ai' else '医美' end as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_spam,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day >=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_day<=REGEXP_REPLACE(current_date(),'-',''))tmp
on first_channel_source_type=tmp.code and m.partition_date=tmp.partition_day
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= REGEXP_REPLACE(current_date(),'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
(
SELECT partition_date,cl_id,count(distinct routine) as routine_num
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,concat(pre_page_name,',',page_name) as routine
FROM
(
SELECT partition_date,
cl_id,
app_session_id,
page_name,
lag(page_name, 1 , 'unknown') over(partition by partition_date,cl_id,app_session_id order by no) as pre_page_name
FROM
(
SELECT partition_date,
cl_id,
app_session_id,
time_str,
page_name,
row_number() over (partition by partition_date,cl_id,app_session_id order by time_str asc) as no
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= REGEXP_REPLACE(current_date(),'-','')
AND page_name in ('face_scan','report_result','face_detect_result','face_simulator')
AND action = 'page_view'
GROUP BY partition_date,
cl_id,
app_session_id,
time_str,
page_name
)a
)b
WHERE concat(pre_page_name,',',page_name) in ('face_scan,report_result','face_scan,face_detect_result','face_scan,face_simulator')
)c
group by partition_date,cl_id
)t2
ON t1.device_id = t2.cl_id AND t1.partition_date = t2.partition_date
LEFT JOIN
(
SELECT partition_date,cl_id,count(distinct routine) as routine_num
FROM
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,concat(pre_page_name,',',page_name) as routine
FROM
(
SELECT partition_date,
cl_id,
app_session_id,
page_name,
lag(page_name, 1 , 'unknown') over(partition by partition_date,cl_id,app_session_id order by no) as pre_page_name
FROM
(
SELECT partition_date,
cl_id,
app_session_id,
time_str,
page_name,
row_number() over (partition by partition_date,cl_id,app_session_id order by time_str asc) as no
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= REGEXP_REPLACE(current_date(),'-','')
AND page_name in ('face_scan','report_result','face_detect_result','face_simulator')
AND action = 'page_view'
GROUP BY partition_date,
cl_id,
app_session_id,
time_str,
page_name
)a
)b
WHERE concat(pre_page_name,',',page_name) in ('face_scan,report_result','face_scan,face_detect_result','face_scan,face_simulator')
)c
group by partition_date,cl_id
)t3
ON t2.cl_id = t3.cl_id AND date_add(t2.partition_date,1) = t3.partition_date
left join
(--ai分发页pv good click
--来自于ai结果页的内容的good_click(主要是测颜值和模拟整形)
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date,a.cl_id,count(distinct a.business_id) as gc_num
from
(
select *,case when page_name in ('diary_detail','topic_detail') THEN 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') THEN 'post'
when page_name in ('answer_detail') THEN 'answer' end as content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail')
and (referrer in ('report_result','face_detect_result','float_tag_detail') or
(params['referrer_link'] like '%[%' and --增加referrer_link限制条件,因为部分页面referrer丢失
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('report_result','face_detect_result','float_tag_detail')))
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--日记这里取的是日记本id,因为从首页点击卡片进入后,先进入日记卡片
SELECT diary_id as content_id,'diary' as type,user_id,create_date
FROM
(
SELECT id,problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and is_online='true'
and regexp_replace(substr(reply_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by id,problem_id,user_id,reply_date
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_date
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'answer' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and is_online='true'
and is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by answer_id,user_id,create_time
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'post' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and is_online='true'
group by tractate_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id and a.content_type=b.type
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id
union all
--在ai结果页中点击了商详卡片(主要是测肤结果页)
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(distinct business_id) as gc_num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action in ('welfare_multiattribute_click_add','welfare_multiattribute_click_buy','on_click_navbar_cart') AND page_name = 'welfare_detail')--点击加车
or action = 'welfare_detail_click_message'--点击私信
or (action = 'on_click_button' AND page_name = 'welfare_detail' AND params['button_name'] in ('question_tag','appointment'))--点击人工咨询+预约面诊
or (action = 'welfare_detail_click_curearea_contact' AND params['connect_type'] in ('phone','onlineconsult'))--点击电话咨询/在线咨询
or (action='welfare_detail_click_curearea' AND params['cure_type'] in ('doctor','organization'))--点击医生/医院
or (action in ('page_click_share','welfare_detail_comment_click_diary_card','service_comment_click_tag')AND page_name='welfare_detail')---点击分享+评价列表页
or (action='welfare_detail_click_close_float_coupon'AND page_name='welfare_detail')--点击上浮窗美券
or (action='welfare_detail_click_coupon' AND params['coupon_type'] in ('precoupon','finalcoupon'))--点击美券
----or(action='on_click_button' AND params['button_name'] in ('sku_choose','sku_all','service_sku')AND page_name='welfare_detail'))--点击美购项目
or (action in ('welfare_detail_click_comment','welfare_detail_click_all_user_case','on_click_diary_card','welfare_detail_click_seller_service_item'))---点击日记/评价/横滑美购
or (action ='on_click_button' AND params['page_name'] in ('welfare_detail') AND params['popup_name'] in ('consult_reserved','consult_call') AND params['trigger'] in ('stay_long','uncontact','view_step') AND params['button_name'] in ('message','call','reserved'))---点击电话/私信弹窗
or (action='on_click_card' AND params['card_content_type']='service' AND params['tab_name']='推荐' AND page_name = 'welfare_detail')--点击推荐美购
or (action ='on_click_favor' AND page_name = 'welfare_detail' AND params['favor_type']='service' AND params['motion']='do'))--点击收藏
and (referrer in ('report_result','face_detect_result','float_tag_detail') or
(params['referrer_link'] like '%[%' and --增加referrer_link限制条件,因为部分页面referrer丢失
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('report_result','face_detect_result','float_tag_detail')))
group by partition_date,cl_id
union all
--在结果页的去别的功能以及视频面诊按钮
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(distinct time_str) as gc_num
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'on_click_button'
and params['page_name'] in ('report_result','face_detect_result') --测肤结果页点击完成、再测一次、扫脸、视频面诊
AND params['button_name'] in ('ai_function','video_interview','AI测肤质','视频面诊','AI测颜值')
group by cl_id,partition_date
union all
--在测肤和测颜值中点击了专业问答或讨论小组到搜索或小组页
select concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(distinct business_id) as gc_num
from online.bl_hdfs_maidian_updates
where partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and ((action='page_view' and page_stay>=20)
or (action='on_click_card' and params['card_type']='card'))
and page_name in ('search_result_question_answer','topic_aggregation')
and (referrer in ('report_result','face_detect_result') or
(params['referrer_link'] like '%[%' and --增加referrer_link限制条件,因为部分页面referrer丢失
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('report_result','face_detect_result','float_tag_detail')))
group by partition_date,cl_id
union all
--在模拟整形结果页点击方案下面三个按钮
select concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id,count(distinct time_str) as gc_num
from online.bl_hdfs_maidian_updates
where partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_button'
and page_name = 'float_tag_detail'
and params['button_name'] in ('consult','bargain','commodity_ranking')
group by partition_date,cl_id
)t4
on t1.partition_date=t4.partition_date and t1.device_id=t4.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
GROUP BY
t1.partition_date
)t
order by `日期`;
--首页内容核心数据
SELECT partition_date as `日期`
,nvl(round(ios_ym_new_click_uv/ios_ym_new_sy_uv,2),0) as `iOS医美新用户首页卡片点击uv/ios医美新用户首页uv`
,nvl(round(ios_ym_new_good_click/ios_ym_new_card_click_uv,2),0) as `iOS医美新用户good click 卡片数uv/iOS医美新用户首页点击卡片数uv`
,nvl(round(ios_ym_old_click_uv/ios_ym_old_sy_uv,2),0) as `iOS医美老用户首页卡片点击uv/iOS医美老用户首页uv`
,nvl(round(ios_ym_old_good_click/ios_ym_old_card_click_uv,2),0) as `iOS医美老用户good click 卡片数uv/iOS医美老用户首页点击卡片数uv`
,nvl(round(ios_ai_new_click_uv/ios_ai_new_sy_uv,2),0) as `iOS AI新用户首页卡片点击uv/iOS AI新用户首页uv`
,nvl(round(ios_ai_new_good_click/ios_ai_new_card_click_uv,2),0) as `iOS AI新用户good click 卡片数uv/iOS AI新用户首页点击卡片数uv`
,nvl(round(ios_ai_old_click_uv/ios_ai_old_sy_uv,2),0) as `iOS AI老用户首页卡片点击uv/iOS AI老用户首页uv`
,nvl(round(ios_ai_old_good_click/ios_ai_old_card_click_uv,2),0) as `iOS AI老用户good click 卡片数uv/iOS AI老用户首页点击卡片数uv`
,nvl(round(andr_ai_new_click_uv/andr_ai_new_sy_uv,2),0) as `AI安卓新用户首页卡片点击uv/AI安卓新用户首页uv`
,nvl(round(andr_ai_new_good_click/andr_ai_new_card_click_uv,2),0) as `AI安卓新用户信息流good click 卡片数uv/AI安卓新用户首页点击卡片数uv`
,nvl(round(andr_ai_old_click_uv/andr_ai_old_sy_uv,2),0) as `AI安卓老用户首页卡片点击uv/AI安卓老用户首页uv`
,nvl(round(andr_ai_old_good_click/andr_ai_old_card_click_uv,2),0) as `AI安卓老用户信息流good click 卡片数uv/AI安卓老用户首页点击卡片数uv`
,nvl(round(andr_ym_new_click_uv/andr_ym_new_sy_uv,2),0) as `安卓医美新用户首页卡片点击uv/安卓医美新用户首页uv`
,nvl(round(andr_ym_new_good_click/andr_ym_new_card_click_uv,2),0) as `安卓医美新用户信息流good click 卡片数uv/安卓医美新用户首页点击卡片数uv`
,nvl(round(andr_ym_old_click_uv/andr_ym_old_sy_uv,2),0) as `安卓医美老用户首页卡片点击uv/安卓医美老用户首页uv`
,nvl(round(andr_ym_old_good_click/andr_ym_old_card_click_uv,2),0) as `安卓医美老用户信息流good click 卡片数uv/安卓医美老用户首页点击卡片数uv`
FROM
(
SELECT t1.partition_date
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='新增' then t4.device_id end) as ios_ym_new_click_uv --iOS医美新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='新增' then t3.device_id end) as ios_ym_new_sy_uv --ios医美新用户首页浏览uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='新增' then array(t5.cl_id,t5.business_id) end) as ios_ym_new_good_click --iOS医美新用户good click
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='新增' then t4.device_id end) as ios_ym_new_card_click_uv --iOS医美新用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='老活' then t4.device_id end) as ios_ym_old_click_uv --iOS医美老用户首页卡片点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='老活' then t3.device_id end) as ios_ym_old_sy_uv --ios医美老用户首页浏览uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='老活' then array(t5.cl_id,t5.business_id) end) as ios_ym_old_good_click --iOS医美老用户good click
,count(distinct case when t1.device_os_type='ios' and t1.channel='医美' and t1.device_type='老活' then t4.device_id end) as ios_ym_old_card_click_uv --iOS医美老用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='新增' then t4.device_id end) as ios_ai_new_click_uv --iOSAI新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='新增' then t3.device_id end) as ios_ai_new_sy_uv --iosAI新用户首页浏览uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='新增' then array(t5.cl_id,t5.business_id) end) as ios_ai_new_good_click --iOSAI新用户good click
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='新增' then t4.device_id end) as ios_ai_new_card_click_uv --iOSAI新用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='老活' then t4.device_id end) as ios_ai_old_click_uv --iOSAI老用户首页卡片点击uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='老活' then t3.device_id end) as ios_ai_old_sy_uv --iosAI老用户首页浏览uv
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='老活' then array(t5.cl_id,t5.business_id) end) as ios_ai_old_good_click --iOSAI老用户good click
,count(distinct case when t1.device_os_type='ios' and t1.channel='AI' and t1.device_type='老活' then t4.device_id end) as ios_ai_old_card_click_uv --iOSAI老用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='新增' then t4.device_id end) as andr_ai_new_click_uv --androidAI新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='新增' then t3.device_id end) as andr_ai_new_sy_uv --androidAI新用户首页浏览uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='新增' then array(t5.cl_id,t5.business_id) end) as andr_ai_new_good_click --androidAI新用户good click
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='新增' then t4.device_id end) as andr_ai_new_card_click_uv --androidAI新用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='老活' then t4.device_id end) as andr_ai_old_click_uv --androidAI新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='老活' then t3.device_id end) as andr_ai_old_sy_uv --androidAI新用户首页浏览uv
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='老活' then array(t5.cl_id,t5.business_id) end) as andr_ai_old_good_click --androidAI新用户good click
,count(distinct case when t1.device_os_type='android' and t1.channel='AI' and t1.device_type='老活' then t4.device_id end) as andr_ai_old_card_click_uv --androidAI新用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='新增' then t4.device_id end) as andr_ym_new_click_uv --androidAI新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='新增' then t3.device_id end) as andr_ym_new_sy_uv --androidAI新用户首页浏览uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='新增' then array(t5.cl_id,t5.business_id) end) as andr_ym_new_good_click --androidAI新用户good click
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='新增' then t4.device_id end) as andr_ym_new_card_click_uv --androidAI新用户首页卡片数点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='老活' then t4.device_id end) as andr_ym_old_click_uv --androidAI新用户首页卡片点击uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='老活' then t3.device_id end) as andr_ym_old_sy_uv --androidAI新用户首页浏览uv
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='老活' then array(t5.cl_id,t5.business_id) end) as andr_ym_old_good_click --androidAI新用户good click
,count(distinct case when t1.device_os_type='android' and t1.channel='医美' and t1.device_type='老活' then t4.device_id end) as andr_ym_old_card_click_uv --androidAI新用户首页卡片数点击uv
FROM
( --分渠道,新老
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,device_os_type,device_id
,CASE WHEN active_type = '4' THEN '老活' when active_type in ('1','2') then '新增' END as device_type
,CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '医美' END as channel
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')) tmp
ON partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
( --去除黑名单
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)t2
on t1.device_id=t2.device_id
LEFT JOIN
( --首页浏览
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id as device_id
FROM online.bl_hdfs_maidian_updates
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action = 'page_view'
AND page_name='home'
group by concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)),cl_id,params['card_id']
)t3
on t3.partition_date=t1.partition_date and t3.device_id=t1.device_id
LEFT JOIN
( --首页feed卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id as device_id
,params['card_id'] as card_id
,count(distinct array(params['card_id'],app_session_id)) as click_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='on_click_card'
and page_name='home'
and params['card_type']='card'
group by partition_date
,cl_id
,params['card_id']
UNION ALL
--点击埋点有丢失,用浏览事件补充一下
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,cl_id as device_id
,params['business_id'] as card_id
,count(distinct array(params['card_id'],app_session_id)) as click_pv
FROM online.bl_hdfs_maidian_updates
where partition_date>=REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date<=regexp_replace(DATE_SUB(current_date,1) ,'-','')
and action='page_view'
and (referrer in ('home') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('home')))
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail')
group by partition_date
,cl_id
,params['business_id']
)t4
on t3.partition_date=t4.partition_date and t3.device_id=t4.device_id
LEFT JOIN
( --内容的good click代码
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id,a.business_id
from
(
select *,case when page_name in ('diary_detail','topic_detail') THEN 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') THEN 'post'
when page_name in ('answer_detail') THEN 'answer' end content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and (referrer='home' or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1]='home'))
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail')
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--日记这里取的是日记本id,因为从首页点击卡片进入后,先进入日记卡片
SELECT diary_id as content_id,'diary' as type,user_id,create_date
FROM
(
SELECT id,problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and is_online='true'
and regexp_replace(substr(reply_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by id,problem_id,user_id,reply_date
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_date
UNION ALL
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'answer' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and is_online='true'
and is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by answer_id,user_id,create_time
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'post' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and is_online='true'
group by tractate_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id and a.content_type=b.type
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id,a.business_id
)t5
on t4.partition_date=t5.partition_date and t4.device_id=t5.cl_id and t4.card_id=t5.business_id
WHERE t2.device_id is null
group by t1.partition_date
)t
order by `日期`;
\ No newline at end of file
--搜索内容核心数据
SELECT t.partition_date as `日期`
,nvl(round(old_diary_gc_uv/old_diary_click_uv,2),0) as `老用户在日记页卡good click 卡片数uv/日记页卡点击uv`
,nvl(round(old_qa_gc_uv/old_qa_click_uv,2),0) as `老用户在问答页卡good click 卡片数uv/问答页卡点击uv`
,'-' as `老用户在帖子页卡good click 卡片数uv/帖子页卡点击uv`
,nvl(round(old_wiki_gc_uv/old_wiki_click_uv,2),0) as `老用户在百科页卡good click 卡片数uv/百科页卡点击uv`
,nvl(round(ios_ym_new_diary_gc_uv/ios_ym_new_diary_click_uv,2),0) as `ios医美新在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
,nvl(round(ios_ym_new_qa_gc_uv/ios_ym_new_qa_click_uv,2),0) as `ios医美新在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
-- ,ios_ym_old_diary_gc_uv/ios_ym_old_diary_click_uv as `ios医美老在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
-- ,ios_ym_old_qa_gc_uv/ios_ym_old_qa_click_uv as `ios医美老在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
,'-' as `ios医美新在帖子页卡good click 卡片数uv/搜索帖子页结果页点击设备数uv`
,nvl(round(ios_ym_new_wiki_gc_uv/ios_ym_new_wiki_click_uv,2),0) as `ios医美新在百科页卡good click 卡片数uv/搜索百科页结果页点击设备数uv`
,nvl(round(andr_ym_new_diary_gc_uv/andr_ym_new_diary_click_uv,2),0) as `android医美新在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
,nvl(round(andr_ym_new_qa_gc_uv/andr_ym_new_qa_click_uv,2),0) as `android医美新在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
-- ,andr_ym_old_diary_gc_uv/andr_ym_old_diary_click_uv as `android医美老在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
-- ,andr_ym_old_qa_gc_uv/andr_ym_old_qa_click_uv as `android医美老在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
,nvl(round(ios_ai_new_diary_gc_uv/ios_ai_new_diary_click_uv,2),0) as `iosAI新在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
,nvl(round(ios_ai_new_qa_gc_uv/ios_ai_new_qa_click_uv,2),0) as `iosAI新在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
-- ,ios_ai_old_diary_gc_uv/ios_ai_old_diary_click_uv as `iosAI老在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
-- ,ios_ai_old_qa_gc_uv/ios_ai_old_qa_click_uv as `iosAI老在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
,nvl(round(andr_ai_new_diary_gc_uv/andr_ai_new_diary_click_uv,2),0) as `android AI新在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
,nvl(round(andr_ai_new_qa_gc_uv/andr_ai_new_qa_click_uv,2),0) as `android AI新在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
-- ,andr_ai_old_diary_gc_uv/andr_ai_old_diary_click_uv as `android AI老在日记页卡good click 卡片数uv/搜索日记页结果页点击设备数uv`
-- ,andr_ai_old_qa_gc_uv/andr_ai_old_qa_click_uv as `android AI老在问答页卡good click 卡片数uv/搜索问答页结果页点击设备数uv`
FROM
(
SELECT t1.partition_date
,count(case when device_type='老活' then t4.cl_id end) as old_diary_gc_uv
,count(distinct case when device_type='老活' and diary_click_pv>0 then t3.cl_id end) as old_diary_click_uv
,count(case when device_type='老活' then t5.cl_id end) as old_qa_gc_uv
,count(distinct case when device_type='老活' and qa_click_pv>0 then t3.cl_id end) as old_qa_click_uv
,count(case when device_type='老活' then t6.cl_id end) as old_post_gc_uv
,count(distinct case when device_type='老活' and post_click_pv>0 then t3.cl_id end) as old_post_click_uv
,count(case when device_type='老活' then t7.cl_id end) as old_wiki_gc_uv
,count(distinct case when device_type='老活' and wiki_click_pv>0 then t3.cl_id end) as old_wiki_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='新增' then t4.cl_id end) as ios_ym_new_diary_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='新增' and diary_click_pv>0 then t3.cl_id end) as ios_ym_new_diary_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='新增' then t5.cl_id end) as ios_ym_new_qa_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='新增' and qa_click_pv>0 then t3.cl_id end) as ios_ym_new_qa_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='老活' then t4.cl_id end) as ios_ym_old_diary_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='老活' and diary_click_pv>0 then t3.cl_id end) as ios_ym_old_diary_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='老活' then t5.cl_id end) as ios_ym_old_qa_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='老活' and qa_click_pv>0 then t3.cl_id end) as ios_ym_old_qa_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='新增' then t4.cl_id end) as andr_ym_new_diary_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='新增' and diary_click_pv>0 then t3.cl_id end) as andr_ym_new_diary_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='新增' then t5.cl_id end) as andr_ym_new_qa_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='新增' and qa_click_pv>0 then t3.cl_id end) as andr_ym_new_qa_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='老活' then t4.cl_id end) as andr_ym_old_diary_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='老活' and diary_click_pv>0 then t3.cl_id end) as andr_ym_old_diary_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='老活' then t5.cl_id end) as andr_ym_old_qa_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='老活' and qa_click_pv>0 then t3.cl_id end) as andr_ym_old_qa_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='新增' then t4.cl_id end) as ios_ai_new_diary_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='新增' and diary_click_pv>0 then t3.cl_id end) as ios_ai_new_diary_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='新增' then t5.cl_id end) as ios_ai_new_qa_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='新增' and qa_click_pv>0 then t3.cl_id end) as ios_ai_new_qa_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='老活' then t4.cl_id end) as ios_ai_old_diary_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='老活' and diary_click_pv>0 then t3.cl_id end) as ios_ai_old_diary_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='老活' then t5.cl_id end) as ios_ai_old_qa_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='老活' and qa_click_pv>0 then t3.cl_id end) as ios_ai_old_qa_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='新增' then t4.cl_id end) as andr_ai_new_diary_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='新增' and diary_click_pv>0 then t3.cl_id end) as andr_ai_new_diary_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='新增' then t5.cl_id end) as andr_ai_new_qa_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='新增' and qa_click_pv>0 then t3.cl_id end) as andr_ai_new_qa_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='老活' then t4.cl_id end) as andr_ai_old_diary_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='老活' and diary_click_pv>0 then t3.cl_id end) as andr_ai_old_diary_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='老活' then t5.cl_id end) as andr_ai_old_qa_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='老活' and qa_click_pv>0 then t3.cl_id end) as andr_ai_old_qa_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='新增' then t6.cl_id end) as ios_ym_new_post_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='新增' and post_click_pv>0 then t3.cl_id end) as ios_ym_new_post_click_uv
,count(case when device_os_type='ios' and channel='医美' and device_type='新增' then t7.cl_id end) as ios_ym_new_wiki_gc_uv
,count(distinct case when device_os_type='ios' and channel='医美' and device_type='新增' and wiki_click_pv>0 then t3.cl_id end) as ios_ym_new_wiki_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='新增' then t6.cl_id end) as ios_ai_new_post_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='新增' and post_click_pv>0 then t3.cl_id end) as ios_ai_new_post_click_uv
,count(case when device_os_type='ios' and channel='AI' and device_type='新增' then t7.cl_id end) as ios_ai_new_wiki_gc_uv
,count(distinct case when device_os_type='ios' and channel='AI' and device_type='新增' and wiki_click_pv>0 then t3.cl_id end) as ios_ai_new_wiki_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='新增' then t6.cl_id end) as andr_ym_new_post_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='新增' and post_click_pv>0 then t3.cl_id end) as andr_ym_new_post_click_uv
,count(case when device_os_type='android' and channel='医美' and device_type='新增' then t7.cl_id end) as andr_ym_new_wiki_gc_uv
,count(distinct case when device_os_type='android' and channel='医美' and device_type='新增' and wiki_click_pv>0 then t3.cl_id end) as andr_ym_new_wiki_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='新增' then t6.cl_id end) as andr_ai_new_post_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='新增' and post_click_pv>0 then t3.cl_id end) as andr_ai_new_post_click_uv
,count(case when device_os_type='android' and channel='AI' and device_type='新增' then t7.cl_id end) as andr_ai_new_wiki_gc_uv
,count(distinct case when device_os_type='android' and channel='AI' and device_type='新增' and wiki_click_pv>0 then t3.cl_id end) as andr_ai_new_wiki_click_uv
FROM
( --分渠道,新老
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,device_os_type,device_id
,CASE WHEN active_type = '4' THEN '老活' when active_type in ('1','2') then '新增' END as device_type
,CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '医美' END as channel
FROM online.ml_device_day_active_status
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_day <= regexp_replace(DATE_SUB(current_date,1) ,'-','')) tmp
ON partition_date=tmp.partition_day AND first_channel_source_type=code
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and partition_date <= regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND active_type IN ('1','2','4')
AND first_channel_source_type not IN ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not LIKE 'promotion\_jf\_%'
)t1
LEFT JOIN
( --去除黑名单
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)t2
on t2.device_id=t1.device_id
LEFT JOIN
( --搜索结果页卡片点击
select partition_date,cl_id,card_id,content_type,sum(diary_click_pv) as diary_click_pv,sum(qa_click_pv) as qa_click_pv
,sum(post_click_pv) as post_click_pv,sum(wiki_click_pv) as wiki_click_pv
from
(
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,case when action='on_click_diary_card' then params['diary_id'] else params['card_id'] end as card_id
,case when page_name='search_result_diary' then 'diary'
when page_name='search_result_question_answer' then 'answer'
when page_name='search_result_post' then 'post'
when page_name='search_result_wiki' then 'wiki' end as content_type
,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id']) END) as diary_click_pv
,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id']) END) as qa_click_pv
,count(distinct CASE WHEN page_name='search_result_post' THEN array(params['card_id']) END) as post_click_pv
,count(distinct CASE WHEN page_name='search_result_wiki' THEN array(params['card_id']) END) as wiki_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND ((action = 'on_click_card' AND params['card_content_type'] in ('answer','diary','question','qa',
'wiki_collect','wiki_item','wiki_brand','wiki_material','wiki_drug','wiki_instrument'))
or action in ('on_click_diary_card','on_click_topic_card','search_result_wiki_click_recommend_wiki'))
AND page_name in ('search_result_diary','search_result_question_answer','search_result_post' ,'search_result_wiki')
GROUP BY cl_id,partition_date,case when action='on_click_diary_card' then params['diary_id'] else params['card_id'] end
,case when page_name='search_result_diary' then 'diary'
when page_name='search_result_question_answer' then 'answer'
when page_name='search_result_post' then 'post'
when page_name='search_result_wiki' then 'wiki' end
union all
--点击埋点有丢,所以用页面浏览来补充一下
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,params['business_id'] as card_id
,case when page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail')
and referrer='search_result_diary' then 'diary'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'answer'
when page_name in ('post_detail','user_post_detail','doctor_post_detail')
and referrer='search_result_post' then 'post'
when page_name in ('wiki_detail','wiki_collect','product_detail','wiki_brand') then 'wiki'
end as content_type
,count(distinct CASE WHEN page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail') and referrer='search_result_diary' THEN time_str END) as diary_click_pv
,count(distinct CASE WHEN page_name in ('question_detail','answer_detail','question_answer_detail') THEN time_str end)as qa_click_pv
,count(distinct CASE WHEN page_name in ('post_detail','user_post_detail','doctor_post_detail') and referrer='search_result_post' THEN time_str END) as post_click_pv
,count(distinct CASE WHEN page_name in ('wiki_detail','wiki_collect','product_detail','wiki_brand') THEN time_str end)as wiki_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
AND partition_date <=regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND action='page_view'
AND referrer in ('search_result_diary','search_result_question_answer','search_result_post','search_result_wiki')
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail','question_detail','answer_detail','question_answer_detail',
'video_steep','article_detail','wiki_detail','wiki_collect','product_detail','wiki_brand')
GROUP BY cl_id,partition_date,params['business_id']
,case when page_name in ('diary_detail','post_detail','user_post_detail','doctor_post_detail')
and referrer='search_result_diary' then 'diary'
when page_name in ('question_detail','answer_detail','question_answer_detail') then 'answer'
when page_name in ('post_detail','user_post_detail','doctor_post_detail')
and referrer='search_result_post' then 'post'
when page_name in ('wiki_detail','wiki_collect','product_detail','wiki_brand') then 'wiki' end
)t
group by partition_date,cl_id,card_id,content_type
)t3
on t3.partition_date=t1.partition_date and t3.cl_id=t1.device_id
LEFT JOIN
( --日记内容的good click代码
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id,a.business_id,'diary' as content_type
from
(
select *,case when page_name in ('diary_detail','topic_detail') THEN 'diary'
when page_name in ('post_detail','user_post_detail','doctor_post_detail') THEN 'post'
end content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and (referrer in ('search_result_diary') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_diary')))
and page_name in ('diary_detail','topic_detail','post_detail','user_post_detail','doctor_post_detail')
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--日记这里取的是日记本id,因为从首页点击卡片进入后,先进入日记卡片
SELECT diary_id as content_id,'diary' as type,user_id,create_date
FROM
(
SELECT id,problem_id,user_id,regexp_replace(substr(reply_date,1,10),'-','') as create_date
FROM online.tl_hdfs_topicreply_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_spam = 'false' --排除疑似广告
and is_online='true'
and regexp_replace(substr(reply_date,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by id,problem_id,user_id,reply_date
)t1
JOIN
(
SELECT id,diary_id
FROM online.tl_hdfs_problem_view
WHERE partition_date = regexp_replace(DATE_SUB(current_date,1) ,'-','')
group by id,diary_id
)t2
on t2.id=t1.problem_id
group by diary_id,user_id,create_date
UNION ALL
--有评论过用户帖的设备
SELECT tractate_id as content_id,'post' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and is_online='true'
group by tractate_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id and a.content_type=b.type
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id,a.business_id,a.content_type
)t4
on t4.partition_date=t3.partition_date and t4.cl_id=t3.cl_id and t4.business_id=t3.card_id and t4.content_type=t3.content_type
LEFT JOIN
( --问答内容的good click代码
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id,a.business_id,'answer' as content_type
from
(
select *
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and (referrer in ('search_result_question_answer') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_question_answer')))
and page_name in ('question_detail','answer_detail','question_answer_detail')
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--有评论过回答的设备,排除疑似广告
SELECT answer_id as content_id,'answer' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_answer_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and (is_fake is NULL or is_fake = 'false')
AND answer_id is not NULL
and is_online='true'
and is_spam = 'false' --排除疑似广告
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
group by answer_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id,a.business_id
)t5
on t5.partition_date=t3.partition_date and t5.cl_id=t3.cl_id and t5.business_id=t3.card_id and t5.content_type=t3.content_type
LEFT JOIN
( --帖子内容的good click
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id,a.business_id,'post' as content_type
from
(
select *,case when page_name in ('post_detail','user_post_detail','doctor_post_detail') THEN 'post'
end content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and (referrer in ('search_result_post') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_post')))
and page_name in ('post_detail','user_post_detail','doctor_post_detail')
)a
left join
(--评论的埋点有缺失,所以用业务库数据来补充
--有评论过用户帖的设备
SELECT tractate_id as content_id,'post' as type,user_id,regexp_replace(substr(create_time,1,10),'-','') as create_date
FROM online.tl_hdfs_api_tractate_reply_view
WHERE partition_date =regexp_replace(DATE_SUB(current_date,1) ,'-','')
and regexp_replace(substr(create_time,1,10),'-','') >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and is_online='true'
group by tractate_id,user_id,create_time
)b
on a.business_id=b.content_id and a.partition_date=b.create_date and a.user_id=b.user_id and a.content_type=b.type
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
or b.user_id is not null )--当天有针对内容的评论
group by a.partition_date,a.cl_id,a.business_id,a.content_type
)t6
on t6.partition_date=t3.partition_date and t6.cl_id=t3.cl_id and t6.business_id=t3.card_id and t6.content_type=t3.content_type
LEFT JOIN
( --百科内容的good click
select concat_ws('-',substr(a.partition_date,1,4),substr(a.partition_date,5,2),substr(a.partition_date,7,2)) as partition_date
,a.cl_id,a.business_id,'wiki' as content_type
from
(
select *,case when page_name in ('wiki_detail','wiki_collect','product_detail','wiki_brand') THEN 'wiki'
end content_type
FROM ONLINE.BL_HDFS_MAIDIAN_UPDATES
WHERE PARTITION_DATE >= REGEXP_REPLACE(trunc(date_sub(current_date(),1),'MM'),'-','')
and (referrer in ('search_result_wiki') or
(params['referrer_link'] like '%[%' and
json_split(params['referrer_link'])[size(json_split(params['referrer_link']))-1] in ('search_result_wiki')))
and page_name in ('wiki_detail','wiki_collect','product_detail','wiki_brand')
)a
WHERE ((action='page_view' and page_stay>=20)--浏览时长大约20s
or action in ('on_click_navbar_search','do_search') --二跳:点击搜索框和搜索按钮
or (action='on_click_card' and params['card_type']='highlight_word')--二跳:高亮词
or (action='on_click_card' and params['card_content_type']in ('service','qa','diary','user_post','answer'))--二跳:点击卡片
or (action='on_click_button' and params['button_name'] in ('video_interview','referral'))--二跳:点击转诊和视频面诊
or (action='on_click_favor' and params['motion']='do')--点击收藏
or action='page_click_share'--点击分享
or (action='on_click_vote' and params['motion']='vote')--点击点赞
--or b.user_id is not null --当天有针对内容的评论
or (action='page_view' and page_name='select_compare') --百科开始对比页面浏览
)
)t7
on t7.partition_date=t3.partition_date and t7.cl_id=t3.cl_id and t7.business_id=t3.card_id and t7.content_type=t3.content_type
WHERE t2.device_id is null
group by t1.partition_date
)t
order by `日期`;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment