FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day='{yesterday_str}'
UNION ALL
SELECT DISTINCT device_id
FROM dim.dim_device_user_staff
""".format(yesterday_str=yesterday_str)
print(sql_spam_pv_device_id)
spam_pv_df=spark.sql(sql_spam_pv_device_id)
spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.show(1)
sql_res=spam_pv_df.collect()
spam_pv_df.cache()
forresinsql_res:
print(res)
print("-------------------------------")
sql_spam_pv_device_id="""
select T.DEVICE_ID,T.CARD_ID,if(NVL(C.EXPOSURE,0) <> 0 ,cast((NVL(T.CLICK_NUM,0)/NVL(C.EXPOSURE,0)) as decimal(18,5)) , 0) as search_ctr from
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T
WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.ACTION IN ('goto_welfare_detail','search_result_welfare_click_item')
GROUP BY T.DEVICE_ID,
T.CARD_ID) T
left join
(SELECT T.DEVICE_ID as DEVICE_ID, --设备ID
T.CARD_ID as CARD_ID, --卡片ID
COUNT(T.CARD_ID) AS EXPOSURE --点击次数
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T
WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.CARD_TYPE = 'common_card'
GROUP BY T.DEVICE_ID,
T.CARD_ID) C on T.DEVICE_ID=C.DEVICE_ID and T.CARD_ID = C.CARD_ID
""".format(partition_day=yesterday_str)
print(sql_spam_pv_device_id)
spam_pv_df=spark.sql(sql_spam_pv_device_id)
spam_pv_df.createOrReplaceTempView("dev_view")
spam_pv_df.show(1)
sql_res=spam_pv_df.collect()
spam_pv_df.cache()
forresinsql_res:
print(res)
print("-------------------------------")
sql_search_ctr=r"""
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE,if(NVL(sum(C.EXPOSURE),0) <> 0 ,cast((NVL(sum(T.CLICK_NUM),0)/NVL(sum(C.EXPOSURE),0)) as decimal(18,5)) , 0) as search_ctr from
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE from
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
...
...
@@ -316,14 +202,14 @@ and (dev.device_id is null or dev.device_id='')