Commit d35d3e3f authored by litaolemo's avatar litaolemo

update

parent 58871406
......@@ -83,101 +83,79 @@ for t in range(0, task_days):
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_search_ctr = r"""
SELECT
exp.partition_date as partition_date
,active_type
,device_os_type
,sum(service_exp_pv) as service_exp_pv
,sum(neirong_exp_pv) as neirong_exp_pv
,sum(service_click_pv) as service_click_pv
,sum(neirong_click_pv) as neirong_click_pv
FROM
(
SELECT t1.partition_day as partition_date,device_id
,service_exp_pv,neirong_exp_pv,service_click_pv,neirong_click_pv
FROM
(--搜索结果页卡片精准曝光
SELECT partition_day,
device_id,
count(CASE WHEN card_content_type='service' THEN 1 END) as service_exp_pv,
count(CASE WHEN card_content_type<>'service' THEN 1 END) as neirong_exp_pv
FROM
(
SELECT device_id,partition_day,card_content_type
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day >= '{partition_day}'
and partition_day < '{end_date}'
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
AND (card_content_type IN ('diary') or card_type = 'diary')
)a
group by partition_day,card_content_type,device_id
)t1
LEFT JOIN
(--搜索结果页卡片点击
SELECT cl_id,partition_date
,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv
,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv
FROM
(
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item')
AND page_name in ('search_result_more','search_result_welfare'))
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare')))
GROUP BY partition_date,cl_id,'service'
UNION ALL
SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date,cl_id,'neirong'
)t2
GROUP BY cl_id,partition_date
)t2
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id
)exp
JOIN
(
SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM
(
SELECT
partition_date,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{partition_day}' AND partition_day < '{end_date}' ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = exp.device_id
AND dev_channel.partition_date = exp.partition_date
GROUP BY exp.partition_date,active_type,device_os_type
""".format(partition_day=yesterday_str, end_date=today_str)
SELECT
t1.partition_date as day_id
,device_os_type
,active_type
,channel
,diary_click_pv
,diary_exp_pv
,qa_click_pv
,qa_exp_pv
FROM
( --dau
SELECT mas.partition_date,t2.active_type,t2.device_os_type,t2.channel,device_id
FROM
(
SELECT
concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date
,m.device_id
,array(device_os_type ,'合计') as device_os_type
,array(case WHEN active_type = '4' THEN '老活'
WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
FROM online.ml_device_day_active_status m
LEFT JOIN
(SELECT code,is_ai_channel,partition_day
FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day="{today_str}" ) tmp
ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
where partition_date ="{today_str}"
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)t1
left JOIN
(--搜索结果页卡片精准曝光
SELECT device_id,concat_ws('-',substr(partition_day,1,4),substr(partition_day,5,2),substr(partition_day,7,2)) as partition_date
,count(distinct CASE WHEN page_code='search_result_diary' THEN array(card_id,app_session_id) END) as diary_exp_pv
,count(CASE WHEN page_code='search_result_question_answer' THEN array(card_id,app_session_id) END) as qa_exp_pv
FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
WHERE partition_day ="{today_str}"
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_question_answer')
AND card_content_type IN ('answer','diary','user_post','doctor_post','question','qa')
group by partition_day,device_id
)t6
on t1.partition_date=t6.partition_date and t1.device_id=t6.device_id
LEFT JOIN
(--搜索结果页卡片点击
SELECT concat_ws('-',substr(partition_date,1,4),substr(partition_date,5,2),substr(partition_date,7,2)) as partition_date,cl_id
,count(distinct CASE WHEN page_name='search_result_diary' THEN array(params['card_id'],app_session_id) END) as diary_click_pv
,count(distinct CASE WHEN page_name='search_result_question_answer' THEN array(params['card_id'],app_session_id) END) as qa_click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date ="{today_str}"
AND action = 'on_click_card'
AND params['card_content_type'] in ('answer','diary','question','qa')
AND page_name in ('search_result_diary','search_result_question_answer')
GROUP BY cl_id,partition_date
)t7
on t6.partition_date=t7.partition_date and t6.device_id=t7.cl_id
left join
( -- 去掉黑名单设备
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY = "{today_str}"
AND is_abnormal_device = 'true'
)spam_pv
on t1.device_id =spam_pv.device_id
WHERE spam_pv.device_id IS NULL
group by t1.partition_date,device_os_type,active_type,channel
""".format(today_str=today_str)
print(sql_search_ctr)
search_ctr_df = spark.sql(sql_search_ctr)
......@@ -192,24 +170,41 @@ for t in range(0, task_days):
device_os_type = res.device_os_type
active_type = res.active_type
partition_date = yesterday_str
pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest()
click_num = res.neirong_click_pv
exposure = res.neirong_exp_pv
channel = res.channel
pid = hashlib.md5((partition_date + device_os_type + active_type + channel).encode("utf8")).hexdigest()
diary_click_num = res.diary_click_pv
diary_exposure = res.neirong_exp_pv,
qa_click_num = res.qa_click_pv,
qa_exposure = res.qa_exp_pv,
try:
search_ctr = round(diary_click_num / diary_exposure, 5)
except:
search_ctr = 0
instert_sql_diary = """replace into search_diary_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,channel=channel
,click_num=diary_click_num,
exposure=diary_exposure, search_ctr=search_ctr
)
try:
search_ctr = round(click_num / exposure, 5)
search_ctr = round(qa_click_num / qa_exposure, 5)
except:
search_ctr = 0
instert_sql = """replace into search_diary_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format(
instert_sql_qa = """replace into search_answer_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr,channel) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr},'{channel}');""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,
click_num=click_num,
exposure=exposure, search_ctr=search_ctr
channel=channel
, click_num=qa_click_num,
exposure=qa_exposure, search_ctr=search_ctr
)
print(instert_sql)
print(instert_sql_diary)
print(instert_sql_qa)
# cursor.execute("set names 'UTF8'")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
res = cursor.execute(instert_sql)
res = cursor.execute(instert_sql_diary)
res = cursor.execute(instert_sql_qa)
db.commit()
print(res)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment