Commit 21aaf34f authored by 赵威's avatar 赵威

rewrite session data

parent 4e02c168
......@@ -45,9 +45,9 @@ def get_spark(app_name=""):
def get_tracate_click_data(spark, start, end):
reg = r"""^\\d+$"""
sql = """
SELECT DISTINCT t1.partition_date, t1.cl_id, cast(t1.business_id as int) card_id
SELECT DISTINCT t1.partition_date, t1.cl_id, cast(t1.business_id as int) card_id, t1.app_session_id
FROM
(select partition_date,cl_id,business_id,action,page_name,page_stay
(select partition_date,cl_id,business_id,action,page_name,page_stay, app_session_id
from online.bl_hdfs_maidian_updates
where action = 'page_view'
AND partition_date BETWEEN '{}' AND '{}'
......@@ -149,7 +149,10 @@ def get_device_click_tractate_ids_dict(click_df):
res = defaultdict(list)
cols = click_df.orderBy("partition_date", ascending=False).collect()
for i in cols:
res[i["cl_id"]].append(i["card_id"])
card_id = i["card_id"]
session_id = i["app_session_id"]
if card_id not in res[session_id]:
res[session_id].append(card_id)
return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment