Commit b095b2e3 authored by litaolemo's avatar litaolemo

update

parent 6e480e62
......@@ -95,11 +95,12 @@ for t in range(1, task_days):
last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
today_str = now.strftime("%Y%m%d")
today_str_format = now.strftime("%Y-%m-%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
yesterday_str_format = (now + datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
new_urser_device_id_sql = r"""
select t2.device_id from
(select first_device as device_id from online.ml_user_history_detail where partition_date = '{today_str}' and date_joined >= '{yesterday_str}' and date_joined <= '{today_str_format}') t2
(select first_device as device_id from online.ml_user_history_detail where partition_date = '{today_str}' and date_joined >= '{yesterday_str_format}' and date_joined <= '{today_str_format}') t2
LEFT JOIN
(
select distinct device_id
......@@ -165,7 +166,7 @@ for t in range(1, task_days):
on t2.device_id=dev.device_id
WHERE spam_pv.device_id IS NULL
and dev.device_id is null
""".format(today_str=today_str,yesterday_str=yesterday_str,today_str_format=today_str_format)
""".format(today_str=today_str,yesterday_str_format=yesterday_str_format,today_str_format=today_str_format)
print(new_urser_device_id_sql)
new_urser_device_id_df = spark.sql(new_urser_device_id_sql)
......@@ -178,4 +179,18 @@ WHERE spam_pv.device_id IS NULL
for count,res in enumerate(sql_res):
print(count,res)
# user_behavior_sql =
maidian_sql = """select t1.device_id,t2.track from
(select device_id from device_id_view)t1
left join
(select cl_id, concat_ws(',', collect_list(action )) as track from
(select * from online.bl_hdfs_maidian_updates where partition_date = {partition_date} and cl_id is not null ) group by cl_id) t2
on t1.device_id = t2.cl_id""".format(partition_date=yesterday_str)
print(maidian_sql)
track_df = spark.sql(maidian_sql)
# track_df.createOrReplaceTempView("device_id_view")
track_df.show(1)
sql_res = track_df.collect()
print("-------------------------------")
for count, res in enumerate(sql_res):
print(count, res)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment