Commit e1c6ae8a authored by litaolemo's avatar litaolemo

update

parent ca581e57
......@@ -88,51 +88,59 @@ for t in range(0, task_days):
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
# CPT日均点击
CPT_daily_click_sql = """SELECT partition_date,count(1) as pv
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >= '{start_date}'
and partition_date < '{end_date}'
AND ((ACTION = 'search_result_welfare_click_item' AND PAGE_NAME = 'search_result_welfare' AND PARAMS['transaction_type'] = 'advertise')
OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'category' AND PARAMS['transaction_type'] = 'operating' AND PARAMS['tab_name'] = 'service')
OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'welfare_home_list_item' and PARAMS['transaction_type'] = 'advertise')
OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'welfare_list' AND PARAMS['transaction_type'] = 'advertise')
OR (ACTION = 'on_click_card' AND PARAMS['card_content_type'] = 'service' AND PARAMS['page_name'] IN ('new_sign','search_result_welfare','category','welfare_home_list_item','welfare_list') AND PARAMS['transaction_type'] = 'advertise'))
group BY partition_date""".format(start_date=yesterday_str, end_date=today_str)
CPT_daily_click_df = spark.sql(CPT_daily_click_sql)
# CPT_daily_click_df.createOrReplaceTempView("cpt_daily_click_df")
sql_res = CPT_daily_click_df.collect()
for res in sql_res:
print(res)
# CPT_daily_click_sql = """SELECT partition_date,count(1) as pv
# FROM online.bl_hdfs_maidian_updates
# WHERE partition_date >= '{start_date}'
# and partition_date < '{end_date}'
# AND ((ACTION = 'search_result_welfare_click_item' AND PAGE_NAME = 'search_result_welfare' AND PARAMS['transaction_type'] = 'advertise')
# OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'category' AND PARAMS['transaction_type'] = 'operating' AND PARAMS['tab_name'] = 'service')
# OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'welfare_home_list_item' and PARAMS['transaction_type'] = 'advertise')
# OR (ACTION = 'goto_welfare_detail' AND PARAMS['from'] = 'welfare_list' AND PARAMS['transaction_type'] = 'advertise')
# OR (ACTION = 'on_click_card' AND PARAMS['card_content_type'] = 'service' AND PARAMS['page_name'] IN ('new_sign','search_result_welfare','category','welfare_home_list_item','welfare_list') AND PARAMS['transaction_type'] = 'advertise'))
# group BY partition_date""".format(start_date=yesterday_str, end_date=today_str)
# CPT_daily_click_df = spark.sql(CPT_daily_click_sql)
# # CPT_daily_click_df.createOrReplaceTempView("cpt_daily_click_df")
# sql_res = CPT_daily_click_df.collect()
# for res in sql_res:
# print(res)
#
# print("0-----------------------------------------------------------------------------")
# # 商详页PV
# bus_detail_pv_sql = """SELECT
# a2.partition_date,count(1) welfare_pv
# FROM
# (
# SELECT cl_id,partition_date
# FROM online.bl_hdfs_maidian_updates
# WHERE partition_date >='{start_date}'and partition_date < '{end_date}'
# AND action='page_view'
# AND params['page_name'] = 'welfare_detail'
# )a1
# JOIN
# (
# SELECT device_id,partition_date
# from online.ml_device_day_active_status
# WHERE partition_date >='{start_date}'and partition_date < '{end_date}'
# AND active_type in ('1','2','4')
# )a2
# on a2.device_id = a1.cl_id
# AND a2.partition_date=a1.partition_date
# group by a2.partition_date""".format(start_date=yesterday_str, end_date=today_str, )
# bus_detail_pv_df = spark.sql(bus_detail_pv_sql)
# # bus_detail_pv_df.createOrReplaceTempView("bus_detail_pv_df")
# sql_res = bus_detail_pv_df.collect()
# for res in sql_res:
# print(res)
# print("1-----------------------------------------------------------------------------")
# cpc当日预算(有效口径)
test_sql = "select * from online.tl_hdfs_cpc_clicklog_view"
cpc_budget_df = spark.sql(test_sql)
print("0-----------------------------------------------------------------------------")
# 商详页PV
bus_detail_pv_sql = """SELECT
a2.partition_date,count(1) welfare_pv
FROM
(
SELECT cl_id,partition_date
FROM online.bl_hdfs_maidian_updates
WHERE partition_date >='{start_date}'and partition_date < '{end_date}'
AND action='page_view'
AND params['page_name'] = 'welfare_detail'
)a1
JOIN
(
SELECT device_id,partition_date
from online.ml_device_day_active_status
WHERE partition_date >='{start_date}'and partition_date < '{end_date}'
AND active_type in ('1','2','4')
)a2
on a2.device_id = a1.cl_id
AND a2.partition_date=a1.partition_date
group by a2.partition_date""".format(start_date=yesterday_str, end_date=today_str, )
bus_detail_pv_df = spark.sql(bus_detail_pv_sql)
# bus_detail_pv_df.createOrReplaceTempView("bus_detail_pv_df")
sql_res = bus_detail_pv_df.collect()
cpc_budget_df.show(1, False)
sql_res = cpc_budget_df.collect()
for res in sql_res:
print(res)
print("1-----------------------------------------------------------------------------")
# cpc当日预算(有效口径)
cpc_budget_sql = """SELECT day_id,sum(budget) as budget
FROM
(
......@@ -142,7 +150,7 @@ for t in range(0, task_days):
SELECT
substr(clicklog.create_time,1,10) AS day_id
,clicklog.merchant_doctor_id
,max(merchant_budget) as merchant_budget --商户预算
,max(merchant_budget) as merchant_budget
FROM
(
SELECT id,promote_id,price,service_budget,merchant_budget,merchant_doctor_id,create_time,recharge
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment