group by partition_day,card_content_type,device_id
SELECT DISTINCT device_id
)t1
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day='{partition_day}'
LEFT JOIN
(--搜索结果页卡片点击
UNION ALL
SELECT cl_id,partition_date
SELECT DISTINCT device_id
,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv
FROM dim.dim_device_user_staff --去除内网用户
,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv
)spam_pv
FROM
on spam_pv.device_id=T.DEVICE_ID
(
LEFT JOIN
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv
(
FROM online.bl_hdfs_maidian_updates
SELECT partition_date,device_id
WHERE partition_date >= '{partition_day}'
FROM
AND partition_date < '{end_date}'
(--找出user_id当天活跃的第一个设备id
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item')
SELECT user_id,partition_date,
AND page_name in ('search_result_more','search_result_welfare'))
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend')
FROM online.ml_user_updates
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare')))
WHERE partition_date>='{partition_day}' AND partition_date<'{end_date}'
GROUP BY partition_date,cl_id,'service'
)t1
JOIN
UNION ALL
( --医生账号
SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv
SELECT distinct user_id
FROM online.bl_hdfs_maidian_updates
FROM online.tl_hdfs_doctor_view
WHERE partition_date >= '{partition_day}'
WHERE partition_date = '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
--马甲账号/模特用户
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
UNION ALL
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
SELECT user_id
GROUP BY partition_date,cl_id,'neirong'
FROM ml.ml_c_ct_ui_user_dimen_d
)t2
WHERE partition_day = '{partition_day}'
GROUP BY cl_id,partition_date
AND (is_puppet = 'true' or is_classifyuser = 'true')
)t2
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id