# FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
# WHERE partition_day='{yesterday_str}'
WHERE partition_day='{yesterday_str}'
#
# UNION ALL
UNION ALL
# SELECT DISTINCT device_id
SELECT DISTINCT device_id
# FROM dim.dim_device_user_staff
FROM dim.dim_device_user_staff
#
# """.format(yesterday_str=yesterday_str)
""".format(yesterday_str=yesterday_str)
# print(sql_spam_pv_device_id)
print(sql_spam_pv_device_id)
# spam_pv_df = spark.sql(sql_spam_pv_device_id)
spam_pv_df=spark.sql(sql_spam_pv_device_id)
# spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.createOrReplaceTempView("spam_pv")
# spam_pv_df.show(1)
spam_pv_df.show(1)
# sql_res = spam_pv_df.collect()
sql_res=spam_pv_df.collect()
# spam_pv_df.cache()
spam_pv_df.cache()
# for res in sql_res:
forresinsql_res:
# print(res)
print(res)
print("-------------------------------")
print("-------------------------------")
...
@@ -189,7 +189,7 @@ for t in range(0, task_days):
...
@@ -189,7 +189,7 @@ for t in range(0, task_days):
""".format(partition_day=yesterday_str)
""".format(partition_day=yesterday_str)
print(sql_spam_pv_device_id)
print(sql_spam_pv_device_id)
spam_pv_df=spark.sql(sql_spam_pv_device_id)
spam_pv_df=spark.sql(sql_spam_pv_device_id)
spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.createOrReplaceTempView("dev_view")
spam_pv_df.show(1)
spam_pv_df.show(1)
sql_res=spam_pv_df.collect()
sql_res=spam_pv_df.collect()
spam_pv_df.cache()
spam_pv_df.cache()
...
@@ -197,6 +197,52 @@ for t in range(0, task_days):
...
@@ -197,6 +197,52 @@ for t in range(0, task_days):
print(res)
print(res)
print("-------------------------------")
print("-------------------------------")
sql_search_ctr=r"""
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE,if(NVL(sum(C.EXPOSURE),0) <> 0 ,cast((NVL(sum(T.CLICK_NUM),0)/NVL(sum(C.EXPOSURE),0)) as decimal(18,5)) , 0) as search_ctr from
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.ACTION IN ('goto_welfare_detail','search_result_welfare_click_item')
GROUP BY T.DEVICE_ID,
T.CARD_ID) T
left join
(SELECT T.DEVICE_ID as DEVICE_ID, --设备ID
T.CARD_ID as CARD_ID, --卡片ID
COUNT(T.CARD_ID) AS EXPOSURE --点击次数
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.CARD_TYPE = 'common_card'
GROUP BY T.DEVICE_ID,
T.CARD_ID) C on T.DEVICE_ID=C.DEVICE_ID and T.CARD_ID = C.CARD_ID LEFT JOIN
(
SELECT T.DEVICE_ID,
T.DEVICE_OS_TYPE,
T.ACTIVE_TYPE
FROM ML.ML_C_CT_DV_DEVICE_DIMEN_D T
WHERE T.PARTITION_DAY = '${partition_day}'
AND T.ACTIVE_TYPE IN ('1', '2', '4'))
D on T.DEVICE_ID = D.DEVICE_ID
LEFT JOIN spam_pv on spam_pv.device_id= T.DEVICE_ID
LEFT JOIN dev_view on dev_view.device_id= T.DEVICE_ID
WHERE (spam_pv.device_id IS NULL or spam_pv.device_id = '')