Commit 11e0c09a authored by litaolemo's avatar litaolemo

update

parent ab758003
...@@ -73,7 +73,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso ...@@ -73,7 +73,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'") spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = [] task_list = []
task_days = 3 task_days = 60
for t in range(1, task_days): for t in range(1, task_days):
day_num = 0 - t day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num)) now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
...@@ -83,103 +83,100 @@ for t in range(1, task_days): ...@@ -83,103 +83,100 @@ for t in range(1, task_days):
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d") one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
sql_search_ctr = r""" sql_search_ctr = r"""
select D.ACTIVE_TYPE,D.DEVICE_OS_TYPE,sum(T.CLICK_NUM) as CLICK_NUM,sum(C.EXPOSURE) as EXPOSURE from SELECT
(SELECT T.DEVICE_ID as DEVICE_ID, --设备ID exp.partition_date as partition_date
T.CARD_ID as CARD_ID, --卡片ID ,active_type
COUNT(T.CARD_ID) AS EXPOSURE --点击次数 ,device_os_type
FROM ML.MID_ML_C_ET_PE_PRECISEEXPOSURE_DIMEN_D T ,sum(service_exp_pv) as service_exp_pv
WHERE T.PARTITION_DAY = '{partition_day}' ,sum(neirong_exp_pv) as neirong_exp_pv
AND T.PAGE_CODE = 'search_result_question_answer' ,sum(service_click_pv) as service_click_pv
GROUP BY T.DEVICE_ID, ,sum(neirong_click_pv) as neirong_click_pv
T.CARD_ID) C FROM
left join
(SELECT T.DEVICE_ID, --设备ID
T.CARD_ID, --卡片ID
SUM(T.CLICK_NUM) AS CLICK_NUM --点击次数
FROM ML.ML_C_ET_CK_CLICK_DIMEN_D T
WHERE T.PARTITION_DAY = '{partition_day}'
AND T.PAGE_CODE = 'search_result_question_answer'
AND T.ACTION IN ('on_click_card')
GROUP BY T.DEVICE_ID,
T.CARD_ID) T
on C.DEVICE_ID=T.DEVICE_ID and C.CARD_ID = T.CARD_ID
LEFT JOIN
( (
SELECT T.DEVICE_ID, SELECT t1.partition_day as partition_date,device_id
T.DEVICE_OS_TYPE, ,service_exp_pv,neirong_exp_pv,service_click_pv,neirong_click_pv
T.ACTIVE_TYPE FROM
FROM ML.ML_C_CT_DV_DEVICE_DIMEN_D T (--搜索结果页卡片精准曝光
WHERE T.PARTITION_DAY = '{partition_day}' SELECT partition_day,
AND T.ACTIVE_TYPE IN ('1', '2', '4')) device_id,
D on C.DEVICE_ID = D.DEVICE_ID count(CASE WHEN card_content_type='service' THEN 1 END) as service_exp_pv,
LEFT JOIN count(CASE WHEN card_content_type<>'service' THEN 1 END) as neirong_exp_pv
(
SELECT DISTINCT device_id
FROM ml.ml_d_ct_dv_devicespam_d --去除机构刷单设备,即作弊设备(浏览和曝光事件去除)
WHERE partition_day='{partition_day}'
UNION ALL
SELECT DISTINCT device_id
FROM dim.dim_device_user_staff --去除内网用户
)spam_pv
on spam_pv.device_id=T.DEVICE_ID
LEFT JOIN
(
SELECT partition_date,device_id
FROM FROM
(--找出user_id当天活跃的第一个设备id (
SELECT user_id,partition_date, SELECT device_id,partition_day,card_content_type
if(size(device_list) > 0, device_list [ 0 ], '') AS device_id FROM ml.mid_ml_c_et_pe_preciseexposure_dimen_d
FROM online.ml_user_updates WHERE partition_day >= '{partition_day}'
WHERE partition_date>='{partition_day}' AND partition_date<'{end_date}' and partition_day < '{end_date}'
and action in ('page_precise_exposure','home_choiceness_card_exposure')
and is_exposure = '1'
and page_code in ('search_result_diary','search_result_doctor','search_result_hospital','search_result_more'
,'search_result_more_infomation','search_result_more_user','search_result_post','search_result_welfare'
,'search_result_wiki','search_result_question_answer')
AND card_content_type IN ('answer','qa')
)a
group by partition_day,card_content_type,device_id
)t1 )t1
JOIN
( --医生账号
SELECT distinct user_id
FROM online.tl_hdfs_doctor_view
WHERE partition_date = '{partition_day}'
--马甲账号/模特用户 LEFT JOIN
UNION ALL (--搜索结果页卡片点击
SELECT user_id SELECT cl_id,partition_date
FROM ml.ml_c_ct_ui_user_dimen_d ,sum(CASE WHEN card_content_type='service' THEN click_pv END) as service_click_pv
WHERE partition_day = '{partition_day}' ,sum(CASE WHEN card_content_type='neirong' THEN click_pv END) as neirong_click_pv
AND (is_puppet = 'true' or is_classifyuser = 'true') FROM
(
SELECT partition_date,cl_id,'service' as card_content_type,count(1) as click_pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('search_result_click_recommend_item','search_result_welfare_click_item')
AND page_name in ('search_result_more','search_result_welfare'))
or (action = 'goto_welfare_detail' AND params ['from'] = 'search_result_welfare_recommend')
or (action = 'on_click_card' AND params['card_content_type'] in ('service') AND page_name in ('search_result_more','search_result_welfare')))
GROUP BY partition_date,cl_id,'service'
UNION ALL UNION ALL
--公司内网覆盖用户 SELECT partition_date,cl_id,'neirong' as card_content_type,count(1) as click_pv
select distinct user_id FROM online.bl_hdfs_maidian_updates
from dim.dim_device_user_staff WHERE partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND ((action in ('on_click_topic_card','on_click_diary_card','search_result_click_infomation_item')
AND page_name in ('search_result_more','search_result_diary','search_result_post'))
or (action = 'on_click_card' AND params['card_content_type'] in ('answer','diary') AND page_name in ('search_result_more','search_result_diary','search_result_question_answer')))
GROUP BY partition_date,cl_id,'neirong'
)t2
GROUP BY cl_id,partition_date
)t2
ON t1.partition_day=t2.partition_date AND t1.device_id=t2.cl_id
)exp
UNION ALL
--登陆过医生设备
SELECT distinct t1.user_id
FROM
(
SELECT user_id, v.device_id as device_id
FROM online.ml_user_history_detail
LATERAL VIEW EXPLODE(device_history_list) v AS device_id
WHERE partition_date = '{partition_day}'
)t1
JOIN JOIN
( (
SELECT device_id SELECT partition_date,device_id,t2.active_type,t2.channel,t2.device_os_type
FROM online.ml_device_history_detail FROM
WHERE partition_date = '{partition_day}' (
AND is_login_doctor = '1' SELECT
)t2 partition_date,m.device_id
ON t1.device_id = t2.device_id ,array(device_os_type ,'合计') as device_os_type
)t2 ,array(case WHEN active_type = '4' THEN '老活'
on t1.user_id=t2.user_id WHEN active_type in ('1','2') then '新增' END ,'合计') as active_type
group by partition_date,device_id ,array(CASE WHEN is_ai_channel = 'true' THEN 'AI' ELSE '其他' END , '合计') as channel
)dev FROM online.ml_device_day_active_status m
on T.DEVICE_ID=dev.device_id LEFT JOIN
WHERE (spam_pv.device_id IS NULL or spam_pv.device_id = '') (SELECT code,is_ai_channel,partition_day
and (dev.device_id is null or dev.device_id='') FROM DIM.DIM_AI_CHANNEL_ZP_NEW
WHERE partition_day>= '{partition_day}' AND partition_day < '{end_date}' ) tmp
GROUP by D.DEVICE_OS_TYPE, ON m.partition_date=tmp.partition_day AND first_channel_source_type=code
D.ACTIVE_TYPE where partition_date >= '{partition_day}'
AND partition_date < '{end_date}'
AND active_type in ('1','2','4')
) mas
LATERAL VIEW explode(mas.channel) t2 AS channel
LATERAL VIEW explode(mas.device_os_type) t2 AS device_os_type
LATERAL VIEW explode(mas.active_type) t2 AS active_type
)dev_channel
on dev_channel.device_id = exp.device_id
AND dev_channel.partition_date = exp.partition_date
GROUP BY exp.partition_date,active_type,device_os_type
""".format(partition_day=yesterday_str, end_date=today_str) """.format(partition_day=yesterday_str, end_date=today_str)
print(sql_search_ctr) print(sql_search_ctr)
...@@ -187,53 +184,32 @@ and (dev.device_id is null or dev.device_id='') ...@@ -187,53 +184,32 @@ and (dev.device_id is null or dev.device_id='')
# spam_pv_df.createOrReplaceTempView("dev_view") # spam_pv_df.createOrReplaceTempView("dev_view")
search_ctr_df.show(1) search_ctr_df.show(1)
sql_res = search_ctr_df.collect() sql_res = search_ctr_df.collect()
res_dict = {
"新增": {
"ios": {"click_num": 0, "exposure": 0},
"android": {"click_num": 0, "exposure": 0}
},
"老活": {
"ios": {"click_num": 0, "exposure": 0},
"android": {"click_num": 0, "exposure": 0}
}
}
print("-------------------------------") print("-------------------------------")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
for res in sql_res: for res in sql_res:
print(res) print(res)
if res.ACTIVE_TYPE: device_os_type = res.device_os_type
if res.ACTIVE_TYPE in ('1', '2'): active_type = res.active_type
if res.CLICK_NUM:
res_dict["新增"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM
if res.EXPOSURE:
res_dict["新增"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE
else:
if res.CLICK_NUM:
res_dict["老活"][res.DEVICE_OS_TYPE]["click_num"] += res.CLICK_NUM
if res.EXPOSURE:
res_dict["老活"][res.DEVICE_OS_TYPE]["exposure"] += res.EXPOSURE
for active_type in res_dict:
for device_os_type in res_dict[active_type]:
partition_date = yesterday_str partition_date = yesterday_str
pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest() pid = hashlib.md5((partition_date + device_os_type + active_type).encode("utf8")).hexdigest()
click_num = res_dict[active_type][device_os_type]["click_num"] click_num = res.neirong_click_pv
exposure = res_dict[active_type][device_os_type]["exposure"] exposure = res.neirong_exp_pv
try: try:
search_ctr = round(click_num / exposure, 5) search_ctr = round(click_num / exposure, 5)
except: except:
search_ctr = 0 search_ctr = 0
instert_sql = """replace into search_answer_ctr( instert_sql = """replace into search_answer_ctr(
partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format( partition_date,device_os_type,active_type,pid,click_num,exposure,search_ctr) VALUES('{partition_date}','{device_os_type}','{active_type}','{pid}',{click_num},{exposure},{search_ctr});""".format(
partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid, click_num=click_num, partition_date=partition_date, device_os_type=device_os_type, active_type=active_type, pid=pid,
click_num=click_num,
exposure=exposure, search_ctr=search_ctr exposure=exposure, search_ctr=search_ctr
) )
print(instert_sql) print(instert_sql)
# cursor.execute("set names 'UTF8'") # cursor.execute("set names 'UTF8'")
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
res = cursor.execute(instert_sql) res = cursor.execute(instert_sql)
db.commit() db.commit()
print(res) print(res)
\ No newline at end of file
# cursor.executemany()
db.close()
...@@ -73,7 +73,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso ...@@ -73,7 +73,7 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'") spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
task_list = [] task_list = []
task_days = 60 task_days = 3
for t in range(1, task_days): for t in range(1, task_days):
day_num = 0 - t day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num)) now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment