Commit a5098c13 authored by 赵威's avatar 赵威

add printer

parent 6e43d520
......@@ -35,7 +35,7 @@ def get_answer_click_data(spark, start, end):
where action = 'page_view'
AND partition_date BETWEEN '{}' AND '{}'
AND page_name='answer_detail'
AND page_stay>=0.5
AND page_stay>=1
AND cl_id is not null
AND cl_id != ''
AND business_id is not null
......@@ -145,11 +145,13 @@ def get_online_answer_ids():
def get_device_click_answer_ids_dict(click_df):
online_ids = get_online_answer_ids()
print("online ids: {}".format(len(online_ids)))
res = defaultdict(list)
cols = click_df.orderBy("partition_date", ascending=False).collect()
for i in cols:
card_id = i["card_id"]
session_id = i["app_session_id"]
print("card_id: {} {}".format(int(card_id), int(card_id) in online_ids)))
if (card_id not in res[session_id]) and int(card_id) in online_ids:
res[session_id].append(card_id)
return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment