Commit 3238eed6 authored by litaolemo's avatar litaolemo

update

parent 3bd4430f
......@@ -242,10 +242,17 @@ WHERE spam_pv.device_id IS NULL
db.close()
res_dict = {}
maidian_sql = """select t1.device_id,t2.track as track from
no_protratit_page_stay = {
0: 0,
60: 0,
180: 0,
300: 0,
600: 0,
}
maidian_sql = """select t1.device_id,t2.track as track,page_stay from
(select device_id from device_id_view where device_id in {device_id_tuple})t1
left join
(select cl_id, concat_ws(',', collect_list(action)) as track from
(select cl_id, concat_ws(',', collect_list(action)) as track,sum(page_stay) as page_stay from
(select * from online.bl_hdfs_maidian_updates where partition_date = {partition_date} and cl_id is not null ) group by cl_id) t2
on t1.device_id = t2.cl_id""".format(partition_date=today_str,device_id_tuple=str(tuple(no_portrait_device_id_list)))
print(maidian_sql)
......@@ -254,16 +261,39 @@ WHERE spam_pv.device_id IS NULL
track_df.show(1)
sql_res = track_df.collect()
print("-------------------------------")
for count, res in enumerate(sql_res):
# print(count, res)
track = res.track
if not track:
continue
track_list = track.split(",")
for one_key_word in track_list:
if one_key_word in res_dict:
res_dict[one_key_word] += 1
else:
res_dict[one_key_word] = 1
print(res_dict)
for res in sql_res:
page_stay = res.page_stay
if page_stay >= 600:
no_protratit_page_stay[600] += 1
elif page_stay >= 300:
no_protratit_page_stay[300] += 1
elif page_stay >= 180:
no_protratit_page_stay[180] += 1
elif page_stay >= 60:
no_protratit_page_stay[60] += 1
elif page_stay >= 0:
no_protratit_page_stay[0] += 1
for protratit_type in no_protratit_page_stay:
partition_date = today_str
pid = hashlib.md5((partition_date + str(protratit_type)).encode("utf8")).hexdigest()
action_count = portrait_dict[protratit_type]
instert_sql = """replace into new_user_protratit_page_stay(
partition_day,pid,protratit_count,protratit_type) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}');""".format(
partition_day=today_str, pid=pid, protratit_count=action_count
, protratit_type=str(protratit_type)
)
# print("-------------------------------")
# for count, res in enumerate(sql_res):
# # print(count, res)
# track = res.track
# if not track:
# continue
# track_list = track.split(",")
# for one_key_word in track_list:
# if one_key_word in res_dict:
# res_dict[one_key_word] += 1
# else:
# res_dict[one_key_word] = 1
# print(res_dict)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment