Commit 21aaf34f authored by 赵威's avatar 赵威

rewrite session data

parent 4e02c168
...@@ -45,9 +45,9 @@ def get_spark(app_name=""): ...@@ -45,9 +45,9 @@ def get_spark(app_name=""):
def get_tracate_click_data(spark, start, end): def get_tracate_click_data(spark, start, end):
reg = r"""^\\d+$""" reg = r"""^\\d+$"""
sql = """ sql = """
SELECT DISTINCT t1.partition_date, t1.cl_id, cast(t1.business_id as int) card_id SELECT DISTINCT t1.partition_date, t1.cl_id, cast(t1.business_id as int) card_id, t1.app_session_id
FROM FROM
(select partition_date,cl_id,business_id,action,page_name,page_stay (select partition_date,cl_id,business_id,action,page_name,page_stay, app_session_id
from online.bl_hdfs_maidian_updates from online.bl_hdfs_maidian_updates
where action = 'page_view' where action = 'page_view'
AND partition_date BETWEEN '{}' AND '{}' AND partition_date BETWEEN '{}' AND '{}'
...@@ -149,7 +149,10 @@ def get_device_click_tractate_ids_dict(click_df): ...@@ -149,7 +149,10 @@ def get_device_click_tractate_ids_dict(click_df):
res = defaultdict(list) res = defaultdict(list)
cols = click_df.orderBy("partition_date", ascending=False).collect() cols = click_df.orderBy("partition_date", ascending=False).collect()
for i in cols: for i in cols:
res[i["cl_id"]].append(i["card_id"]) card_id = i["card_id"]
session_id = i["app_session_id"]
if card_id not in res[session_id]:
res[session_id].append(card_id)
return res return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment