Commit d35d3e3f authored by litaolemo's avatar litaolemo

update

parent 58871406
...@@ -83,101 +83,79 @@ for t in range(0, task_days): ...@@ -83,101 +83,79 @@ for t in range(0, task_days):
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d") one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_search_ctr = r""" sql_search_ctr = r"""
SELECT SELECT
exp.partition_date as partition_date t1.partition_date as day_id
,active_type ,device_os_type
,device_os_type ,active_type
,sum(service_exp_pv) as service_exp_pv ,channel
,sum(neirong_exp_pv) as neirong_exp_pv ,diary_click_pv
,sum(service_click_pv) as service_click_pv ,diary_exp_pv
,sum(neirong_click_pv) as neirong_click_pv ,qa_click_pv
FROM ,qa_exp_pv
( FROM
SELECT t1.partition_day as partition_date,device_id ( --dau
,service_exp_pv,neirong_exp_pv,service_click_pv,neirong_click_pv SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
FROM FROM
(--搜索结果页卡片精准曝光 (
SELECT partition_day, SELECT
device_id, concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
count(CASE WHEN card_content_type='service' THEN 1 END) as service_exp_pv, ,m.device_id
count(CASE WHEN card_content_type<>'service' THEN 1 END) as neirong_exp_pv ,array(device_os_type ,'合计') as device_os_type
FROM ,array(case WHEN active_type = '4' THEN '老活'
( WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
SELECT device_id,partition_day,card_content_type ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d FROM online.ml_device_day_active_status m
WHERE partition_day >= '{partition_day}' LEFT JOIN
and partition_day < '{end_date}' (SELECT code,is_ai_channel,partition_day
and action in ('page_precise_exposure','home_choiceness_card_exposure') FROM DIM.DIM_AI_CHANNEL_ZP_NEW
and is_exposure = '1' WHERE partition_day="{today_str}" ) tmp
and page_code in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more' ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare' where partition_date ="{today_str}"
,'search_result_wiki','search_result_question_answer') AND active_type in ('1','2','4')
AND (card_content_type IN ('diary') or card_type = 'diary') ) mas
)a LATERAL VIEW explode(mas.channel) t2 AS channel
group by partition_day,card_content_type,device_id LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
)t1 LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t1
LEFT JOIN left JOIN
(--搜索结果页卡片点击 (--搜索结果页卡片精准曝光
SELECT cl_id,partition_date SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv ,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv ,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
FROM FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
( WHERE partition_day ="{today_str}"
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv and action in ('page_precise_exposure','home_choiceness_card_exposure')
FROM online.bl_hdfs_maidian_updates and is_exposure = '1'
WHERE partition_date >= '{partition_day}' and page_code in ('search_result_diary','search_result_question_answer')
AND partition_date < '{end_date}' AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item') group by partition_day,device_id
AND page_name in ('search_result_more','search_result_welfare')) )t6
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend') on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare'))) LEFT JOIN
GROUP BY partition_date,cl_id,'service' (--搜索结果页卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
UNION ALL ,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv ,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
FROM online.bl_hdfs_maidian_updates FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}' WHERE partition_date ="{today_str}"
AND partition_date < '{end_date}' AND action = 'on_click_card'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item') AND params['card_content_type'] in ('answer','diary','question','qa')
AND page_name in ('search_result_more','search_result_diary','search_result_post')) AND page_name in ('search_result_diary','search_result_question_answer')
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer'))) GROUP BY cl_id,partition_date
GROUP BY partition_date,cl_id,'neirong' )t7
)t2 on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id
GROUP BY cl_id,partition_date
)t2 left join
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id ( -- 去掉黑名单设备
)exp select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
JOIN where PARTITION_DAY = "{today_str}"
( AND is_abnormal_device = 'true'
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type )spam_pv
FROM on t1.device_id =spam_pv.device_id
( WHERE spam_pv.device_id IS NULL
SELECT group by t1.partition_date,device_os_type,active_type,channel
partition_date,m.device_id """.format(today_str=today_str)
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{partition_day}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = exp.device_id
AND dev_channel.partition_date = exp.partition_date
GROUP BY exp.partition_date,active_type,device_os_type
""".format(partition_day=yesterday_str, end_date=today_str)
print(sql_search_ctr) print(sql_search_ctr)
search_ctr_df = spark.sql(sql_search_ctr) search_ctr_df = spark.sql(sql_search_ctr)
...@@ -192,24 +170,41 @@ for t in range(0, task_days): ...@@ -192,24 +170,41 @@ for t in range(0, task_days):
device_os_type = res.device_os_type device_os_type = res.device_os_type
active_type = res.active_type active_type = res.active_type
partition_date = yesterday_str partition_date = yesterday_str
pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest() channel = res.channel
click_num = res.neirong_click_pv pid = hashlib.md5((partition_date + device_os_type + active_type + channel).encode("utf8")).hexdigest()
exposure = res.neirong_exp_pv diary_click_num = res.diary_click_pv
diary_exposure = res.neirong_exp_pv,
qa_click_num = res.qa_click_pv,
qa_exposure = res.qa_exp_pv,
try:
search_ctr = round(diary_click_num / diary_exposure, 5)
except:
search_ctr = 0
instert_sql_diary = """replace into search_diary_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,channel=channel
,click_num=diary_click_num,
exposure=diary_exposure, search_ctr=search_ctr
)
try: try:
search_ctr = round(click_num / exposure, 5) search_ctr = round(qa_click_num / qa_exposure, 5)
except: except:
search_ctr = 0 search_ctr = 0
instert_sql = """replace into search_diary_ctr( instert_sql_qa = """replace into search_answer_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format( partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid, partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,
click_num=click_num, channel=channel
exposure=exposure, search_ctr=search_ctr , click_num=qa_click_num,
exposure=qa_exposure, search_ctr=search_ctr
) )
print(instert_sql) print(instert_sql_diary)
print(instert_sql_qa)
# cursor.execute("set names 'UTF8'") # cursor.execute("set names 'UTF8'")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy', db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod') db='jerry_prod')
cursor = db.cursor() cursor = db.cursor()
res = cursor.execute(instert_sql) res = cursor.execute(instert_sql_diary)
res = cursor.execute(instert_sql_qa)
db.commit() db.commit()
print(res) print(res)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment