Commit f31161e5 authored by litaolemo's avatar litaolemo

update

parent d0cd2f1d
...@@ -83,46 +83,97 @@ for t in range(1, task_days): ...@@ -83,46 +83,97 @@ for t in range(1, task_days):
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d") one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_search_ctr = r""" sql_search_ctr = r"""
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE from SELECT
(SELECT T.DEVICE_ID, --设备ID exp.partition_date as partition_date
T.CARD_ID, --卡片ID ,active_type
COUNT(T.CARD_ID) AS EXPOSURE --点击次数 ,device_os_type
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T ,sum(service_exp_pv) as service_exp_pv
WHERE T.PARTITION_DAY = '{partition_day}' ,sum(neirong_exp_pv) as neirong_exp_pv
AND T.PAGE_CODE = 'search_result_post' ,sum(service_click_pv) as service_click_pv
GROUP BY T.DEVICE_ID, ,sum(neirong_click_pv) as neirong_click_pv
T.CARD_ID) C FROM
(
left join SELECT t1.partition_day as partition_date,t1.device_id
,service_exp_pv,neirong_exp_pv,service_click_pv,neirong_click_pv
(SELECT PARAMS['card_id'] as CARD_ID,device_id as DEVICE_ID, COUNT(PARAMS['card_id']) AS CLICK_NUM FROM
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T (--搜索结果页卡片精准曝光
WHERE T.PARTITION_DAY = '{partition_day}' SELECT partition_day,
AND T.PAGE_CODE = 'search_result_post' device_id,
AND T.ACTION IN ('search_result_click_infomation_item','on_click_topic_card') count(CASE WHEN card_content_type='service' THEN 1 END) as service_exp_pv,
group by device_id,PARAMS['card_id']) T count(CASE WHEN card_content_type<>'service' THEN 1 END) as neirong_exp_pv
FROM
on C.DEVICE_ID = T.DEVICE_ID AND C.CARD_ID = T.CARD_ID (
LEFT JOIN SELECT device_id,partition_day,card_content_type
( FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
SELECT T.DEVICE_ID, WHERE partition_day >= '${start_date}'
T.DEVICE_OS_TYPE, and partition_day < '${end_date}'
T.ACTIVE_TYPE and action in ('page_precise_exposure','home_choiceness_card_exposure')
FROM ML.ML_C_CT_DV_DEVICE_DIMEN_D T and is_exposure = '1'
WHERE T.PARTITION_DAY = '{partition_day}' and page_code in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
AND T.ACTIVE_TYPE IN ('1', '2', '4')) ,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
D on C.DEVICE_ID = D.DEVICE_ID ,'search_result_wiki','search_result_question_answer')
LEFT JOIN AND card_content_type IN ('user_post')
( )a
SELECT DISTINCT device_id group by partition_day,card_content_type,device_id
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除) )t1
WHERE partition_day='{partition_day}' LEFT JOIN
(--搜索结果页卡片点击
UNION ALL SELECT cl_id,partition_date
SELECT DISTINCT device_id ,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv
FROM dim.dim_device_user_staff --去除内网用户 ,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv
)spam_pv FROM
on T.DEVICE_ID=spam_pv.device_id (
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item')
AND page_name in ('search_result_more','search_result_welfare'))
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare')))
GROUP BY partition_date,cl_id,'service'
UNION ALL
SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date,cl_id,'neirong'
)t2
GROUP BY cl_id,partition_date
)t2
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id
)exp
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '${start_date}' AND partition_day < '${end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '${start_date}'
AND partition_date < '${end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = exp.device_id
AND dev_channel.partition_date = exp.partition_date
LEFT JOIN LEFT JOIN
( (
SELECT partition_date,device_id SELECT partition_date,device_id
...@@ -173,12 +224,10 @@ LEFT JOIN ...@@ -173,12 +224,10 @@ LEFT JOIN
on t1.user_id=t2.user_id on t1.user_id=t2.user_id
group by partition_date,device_id group by partition_date,device_id
)dev )dev
on T.DEVICE_ID=dev.device_id on exp.device_id=dev.device_id
WHERE (spam_pv.device_id IS NULL or spam_pv.device_id = '') WHERE (exp.device_id IS NULL or exp.device_id = '')
and (dev.device_id is null or dev.device_id='') and (dev.device_id is null or dev.device_id='')
GROUP BY exp.partition_date,active_type,device_os_type
GROUP by D.DEVICE_OS_TYPE,
D.ACTIVE_TYPE
""".format(partition_day=yesterday_str, end_date=today_str) """.format(partition_day=yesterday_str, end_date=today_str)
print(sql_search_ctr) print(sql_search_ctr)
...@@ -186,53 +235,33 @@ and (dev.device_id is null or dev.device_id='') ...@@ -186,53 +235,33 @@ and (dev.device_id is null or dev.device_id='')
# spam_pv_df.createOrReplaceTempView("dev_view") # spam_pv_df.createOrReplaceTempView("dev_view")
search_ctr_df.show(1) search_ctr_df.show(1)
sql_res = search_ctr_df.collect() sql_res = search_ctr_df.collect()
res_dict = {
"新增": {
"ios": {"click_num": 0, "exposure": 0},
"android": {"click_num": 0, "exposure": 0}
},
"老活": {
"ios": {"click_num": 0, "exposure": 0},
"android": {"click_num": 0, "exposure": 0}
}
}
print("-------------------------------") print("-------------------------------")
for res in sql_res:
print(res) for active_type in sql_res:
if res.ACTIVE_TYPE: print(active_type)
if res.ACTIVE_TYPE in ('1', '2'): # for device_os_type in sql_res[active_type]:
if res.CLICK_NUM: # partition_date = yesterday_str
res_dict["新增"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM # pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest()
if res.EXPOSURE: # click_num = sql_res[active_type][device_os_type]["click_num"]
res_dict["新增"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE # exposure = sql_res[active_type][device_os_type]["exposure"]
else: # try:
if res.CLICK_NUM: # search_ctr = round(click_num / exposure, 5)
res_dict["老活"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM # except:
if res.EXPOSURE: # search_ctr = 0
res_dict["老活"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE # instert_sql = """replace into search_tractate_ctr(
for active_type in res_dict: # partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format(
for device_os_type in res_dict[active_type]: # partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid, click_num=click_num,
partition_date = yesterday_str # exposure=exposure, search_ctr=search_ctr
pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest() # )
click_num = res_dict[active_type][device_os_type]["click_num"] # print(instert_sql)
exposure = res_dict[active_type][device_os_type]["exposure"] # # cursor.execute("set names 'UTF8'")
try: # db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
search_ctr = round(click_num / exposure, 5) # db='jerry_prod')
except: # cursor = db.cursor()
search_ctr = 0 # res = cursor.execute(instert_sql)
instert_sql = """replace into search_tractate_ctr( # db.commit()
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format( # print(res)
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid, click_num=click_num, # # cursor.executemany()
exposure=exposure, search_ctr=search_ctr # db.close()
)
print(instert_sql)
# cursor.execute("set names 'UTF8'")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
res = cursor.execute(instert_sql)
db.commit()
print(res)
# cursor.executemany()
db.close()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment