Commit 98c206ca authored by litaolemo's avatar litaolemo

update

parent 047f8c17
......@@ -81,25 +81,6 @@ for t in range(1, task_days):
today_str = now.strftime("%Y%m%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_spam_pv_device_id = """
SELECT DISTINCT device_id
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day={yesterday_str}
UNION ALL
SELECT DISTINCT dev.device_id
FROM dim.dim_device_user_staff --去除内网用户
""".format(yesterday_str=yesterday_str, today_str=today_str)
print(sql_spam_pv_device_id)
spam_pv_df = spark.sql(sql_spam_pv_device_id)
spam_pv_view = spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.show(1)
sql_res = spam_pv_df.collect()
for res in sql_res:
print(res)
print("-------------------------------")
sql_dev_device_id = """
SELECT partition_date,device_id
FROM
......@@ -150,7 +131,7 @@ for t in range(1, task_days):
group by partition_date,device_id""".format(yesterday_str=yesterday_str, today_str=today_str)
print(sql_dev_device_id)
dev_df = spark.sql(sql_dev_device_id)
dev_df_view = spam_pv_df.createOrReplaceTempView("dev_view")
dev_df_view = dev_df.createOrReplaceTempView("dev_view")
dev_df_view.show(1)
sql_res = dev_df_view.collect()
for res in sql_res:
......@@ -158,6 +139,26 @@ for t in range(1, task_days):
print("-------------------------------")
sql_spam_pv_device_id = """
SELECT DISTINCT device_id
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day={yesterday_str}
UNION ALL
SELECT DISTINCT dev_view.device_id
FROM dev_view.dim_device_user_staff --去除内网用户
""".format(yesterday_str=yesterday_str)
print(sql_spam_pv_device_id)
spam_pv_df = spark.sql(sql_spam_pv_device_id)
spam_pv_view = spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.show(1)
sql_res = spam_pv_df.collect()
for res in sql_res:
print(res)
print("-------------------------------")
sql = r"""
SELECT t3.partition_date as partition_date
,t3.device_os_type as device_os_type
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment