Commit d35d3e3f authored by litaolemo's avatar litaolemo

update

parent 58871406
...@@ -84,78 +84,22 @@ for t in range(0, task_days): ...@@ -84,78 +84,22 @@ for t in range(0, task_days):
sql_search_ctr = r""" sql_search_ctr = r"""
SELECT SELECT
exp.partition_date as partition_date t1.partition_date as day_id
,active_type
,device_os_type ,device_os_type
,sum(service_exp_pv) as service_exp_pv ,active_type
,sum(neirong_exp_pv) as neirong_exp_pv ,channel
,sum(service_click_pv) as service_click_pv ,diary_click_pv
,sum(neirong_click_pv) as neirong_click_pv ,diary_exp_pv
FROM ,qa_click_pv
( ,qa_exp_pv
SELECT t1.partition_day as partition_date,device_id FROM
,service_exp_pv,neirong_exp_pv,service_click_pv,neirong_click_pv ( --dau
FROM SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
(--搜索结果页卡片精准曝光
SELECT partition_day,
device_id,
count(CASE WHEN card_content_type='service' THEN 1 END) as service_exp_pv,
count(CASE WHEN card_content_type<>'service' THEN 1 END) as neirong_exp_pv
FROM
(
SELECT device_id,partition_day,card_content_type
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day >= '{partition_day}'
and partition_day < '{end_date}'
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
AND (card_content_type IN ('diary') or card_type = 'diary')
)a
group by partition_day,card_content_type,device_id
)t1
LEFT JOIN
(--搜索结果页卡片点击
SELECT cl_id,partition_date
,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv
,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv
FROM
(
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item')
AND page_name in ('search_result_more','search_result_welfare'))
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare')))
GROUP BY partition_date,cl_id,'service'
UNION ALL
SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date,cl_id,'neirong'
)t2
GROUP BY cl_id,partition_date
)t2
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id
)exp
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM FROM
( (
SELECT SELECT
partition_date,m.device_id concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id
,array(device_os_type ,'合计') as device_os_type ,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活' ,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
...@@ -164,20 +108,54 @@ for t in range(0, task_days): ...@@ -164,20 +108,54 @@ for t in range(0, task_days):
LEFT JOIN LEFT JOIN
(SELECT code,is_ai_channel,partition_day (SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{partition_day}' AND partition_day < '{end_date}' ) tmp WHERE partition_day="{today_str}" ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{partition_day}' where partition_date ="{today_str}"
AND partition_date < '{end_date}'
AND active_type in ('1','2','4') AND active_type in ('1','2','4')
) mas ) mas
LATERAL VIEW explode(mas.channel) t2 AS channel LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel )t1
on dev_channel.device_id = exp.device_id left JOIN
AND dev_channel.partition_date = exp.partition_date (--搜索结果页卡片精准曝光
GROUP BY exp.partition_date,active_type,device_os_type SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
""".format(partition_day=yesterday_str, end_date=today_str) ,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day ="{today_str}"
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_question_answer')
AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
group by partition_day,device_id
)t6
on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
LEFT JOIN
(--搜索结果页卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date ="{today_str}"
AND action = 'on_click_card'
AND params['card_content_type'] in ('answer','diary','question','qa')
AND page_name in ('search_result_diary','search_result_question_answer')
GROUP BY cl_id,partition_date
)t7
on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = "{today_str}"
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
""".format(today_str=today_str)
print(sql_search_ctr) print(sql_search_ctr)
search_ctr_df = spark.sql(sql_search_ctr) search_ctr_df = spark.sql(sql_search_ctr)
...@@ -192,24 +170,41 @@ for t in range(0, task_days): ...@@ -192,24 +170,41 @@ for t in range(0, task_days):
device_os_type = res.device_os_type device_os_type = res.device_os_type
active_type = res.active_type active_type = res.active_type
partition_date = yesterday_str partition_date = yesterday_str
pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest() channel = res.channel
click_num = res.neirong_click_pv pid = hashlib.md5((partition_date + device_os_type + active_type + channel).encode("utf8")).hexdigest()
exposure = res.neirong_exp_pv diary_click_num = res.diary_click_pv
diary_exposure = res.neirong_exp_pv,
qa_click_num = res.qa_click_pv,
qa_exposure = res.qa_exp_pv,
try:
search_ctr = round(diary_click_num / diary_exposure, 5)
except:
search_ctr = 0
instert_sql_diary = """replace into search_diary_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,channel=channel
,click_num=diary_click_num,
exposure=diary_exposure, search_ctr=search_ctr
)
try: try:
search_ctr = round(click_num / exposure, 5) search_ctr = round(qa_click_num / qa_exposure, 5)
except: except:
search_ctr = 0 search_ctr = 0
instert_sql = """replace into search_diary_ctr( instert_sql_qa = """replace into search_answer_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format( partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid, partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,
click_num=click_num, channel=channel
exposure=exposure, search_ctr=search_ctr , click_num=qa_click_num,
exposure=qa_exposure, search_ctr=search_ctr
) )
print(instert_sql) print(instert_sql_diary)
print(instert_sql_qa)
# cursor.execute("set names 'UTF8'") # cursor.execute("set names 'UTF8'")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy', db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod') db='jerry_prod')
cursor = db.cursor() cursor = db.cursor()
res = cursor.execute(instert_sql) res = cursor.execute(instert_sql_diary)
res = cursor.execute(instert_sql_qa)
db.commit() db.commit()
print(res) print(res)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment