Commit be777834 authored by litaolemo's avatar litaolemo

update

parent 0c081a62
......@@ -81,123 +81,9 @@ for t in range(1, task_days):
today_str = now.strftime("%Y%m%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_dev_device_id = """
SELECT partition_date,device_id
FROM
(--找出user_id当天活跃的第一个设备id
SELECT user_id,partition_date,
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id
FROM online.ml_user_updates
WHERE partition_date>='{yesterday_str}' AND partition_date<'{today_str}'
)t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = '{yesterday_str}'
--马甲账号/模特用户
UNION ALL
SELECT user_id
FROM ml.ml_c_ct_ui_user_dimen_d
WHERE partition_day = '{yesterday_str}'
AND (is_puppet = 'true' or is_classifyuser = 'true')
UNION ALL
--公司内网覆盖用户
select distinct user_id
from dim.dim_device_user_staff
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = '{yesterday_str}'
)t1
JOIN
(
SELECT device_id
FROM online.ml_device_history_detail
WHERE partition_date = '{yesterday_str}'
AND is_login_doctor = '1'
)t2
ON t1.device_id = t2.device_id
)t2
on t1.user_id=t2.user_id
group by partition_date,device_id
""".format(yesterday_str=yesterday_str, today_str=today_str)
print(sql_dev_device_id)
dev_df = spark.sql(sql_dev_device_id)
dev_df_view = dev_df.createOrReplaceTempView("dev_view")
dev_df.cache()
dev_df.show(1)
sql_res = dev_df.collect()
for res in sql_res:
print(res)
print("-------------------------------")
sql_spam_pv_device_id = """
SELECT DISTINCT device_id
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day='{yesterday_str}'
UNION ALL
SELECT DISTINCT device_id
FROM dim.dim_device_user_staff
""".format(yesterday_str=yesterday_str)
print(sql_spam_pv_device_id)
spam_pv_df = spark.sql(sql_spam_pv_device_id)
spam_pv_df.createOrReplaceTempView("spam_pv")
spam_pv_df.show(1)
sql_res = spam_pv_df.collect()
spam_pv_df.cache()
for res in sql_res:
print(res)
print("-------------------------------")
sql_spam_pv_device_id = """
select T.DEVICE_ID,T.CARD_ID,if(NVL(C.EXPOSURE,0) <> 0 ,cast((NVL(T.CLICK_NUM,0)/NVL(C.EXPOSURE,0)) as decimal(18,5)) , 0) as search_ctr from
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T
WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.ACTION IN ('goto_welfare_detail','search_result_welfare_click_item')
GROUP BY T.DEVICE_ID,
T.CARD_ID) T
left join
(SELECT T.DEVICE_ID as DEVICE_ID, --设备ID
T.CARD_ID as CARD_ID, --卡片ID
COUNT(T.CARD_ID) AS EXPOSURE --点击次数
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T
WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_welfare'
AND T.CARD_TYPE = 'common_card'
GROUP BY T.DEVICE_ID,
T.CARD_ID) C on T.DEVICE_ID=C.DEVICE_ID and T.CARD_ID = C.CARD_ID
""".format(partition_day=yesterday_str)
print(sql_spam_pv_device_id)
spam_pv_df = spark.sql(sql_spam_pv_device_id)
spam_pv_df.createOrReplaceTempView("dev_view")
spam_pv_df.show(1)
sql_res = spam_pv_df.collect()
spam_pv_df.cache()
for res in sql_res:
print(res)
print("-------------------------------")
sql_search_ctr = r"""
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE,if(NVL(sum(C.EXPOSURE),0) <> 0 ,cast((NVL(sum(T.CLICK_NUM),0)/NVL(sum(C.EXPOSURE),0)) as decimal(18,5)) , 0) as search_ctr from
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE from
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
......@@ -316,14 +202,14 @@ and (dev.device_id is null or dev.device_id='')
db='jerry_prod')
cursor = db.cursor()
for res in sql_res:
# print(res)
if res.active_type:
if res.active_type in (1, 2):
res_dict["新增"][res.device_os_type]["click_num"] += res.click_num
res_dict["新增"][res.device_os_type]["exposure"] += res.exposure
print(res)
if res.ACTIVE_TYPE:
if res.ACTIVE_TYPE in (1, 2):
res_dict["新增"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM
res_dict["新增"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE
else:
res_dict["老活"][res.device_os_type]["click_num"] += res.click_num
res_dict["老活"][res.device_os_type]["exposure"] += res.exposure
res_dict["老活"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM
res_dict["老活"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE
for active_type in res_dict:
for device_os_type in res_dict[active_type]:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment