Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
宋柯
meta_base_code
Commits
3238eed6
Commit
3238eed6
authored
Sep 17, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
3bd4430f
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
45 additions
and
15 deletions
+45
-15
new_user_has_protratit_rate.py
new_user_analysis/new_user_has_protratit_rate.py
+45
-15
No files found.
new_user_analysis/new_user_has_protratit_rate.py
View file @
3238eed6
...
...
@@ -242,10 +242,17 @@ WHERE spam_pv.device_id IS NULL
db
.
close
()
res_dict
=
{}
maidian_sql
=
"""select t1.device_id,t2.track as track from
no_protratit_page_stay
=
{
0
:
0
,
60
:
0
,
180
:
0
,
300
:
0
,
600
:
0
,
}
maidian_sql
=
"""select t1.device_id,t2.track as track,page_stay from
(select device_id from device_id_view where device_id in {device_id_tuple})t1
left join
(select cl_id, concat_ws(',', collect_list(action)) as track from
(select cl_id, concat_ws(',', collect_list(action)) as track
,sum(page_stay) as page_stay
from
(select * from online.bl_hdfs_maidian_updates where partition_date = {partition_date} and cl_id is not null ) group by cl_id) t2
on t1.device_id = t2.cl_id"""
.
format
(
partition_date
=
today_str
,
device_id_tuple
=
str
(
tuple
(
no_portrait_device_id_list
)))
print
(
maidian_sql
)
...
...
@@ -254,16 +261,39 @@ WHERE spam_pv.device_id IS NULL
track_df
.
show
(
1
)
sql_res
=
track_df
.
collect
()
print
(
"-------------------------------"
)
for
count
,
res
in
enumerate
(
sql_res
):
# print(count, res)
track
=
res
.
track
if
not
track
:
continue
track_list
=
track
.
split
(
","
)
for
one_key_word
in
track_list
:
if
one_key_word
in
res_dict
:
res_dict
[
one_key_word
]
+=
1
else
:
res_dict
[
one_key_word
]
=
1
print
(
res_dict
)
for
res
in
sql_res
:
page_stay
=
res
.
page_stay
if
page_stay
>=
600
:
no_protratit_page_stay
[
600
]
+=
1
elif
page_stay
>=
300
:
no_protratit_page_stay
[
300
]
+=
1
elif
page_stay
>=
180
:
no_protratit_page_stay
[
180
]
+=
1
elif
page_stay
>=
60
:
no_protratit_page_stay
[
60
]
+=
1
elif
page_stay
>=
0
:
no_protratit_page_stay
[
0
]
+=
1
for
protratit_type
in
no_protratit_page_stay
:
partition_date
=
today_str
pid
=
hashlib
.
md5
((
partition_date
+
str
(
protratit_type
))
.
encode
(
"utf8"
))
.
hexdigest
()
action_count
=
portrait_dict
[
protratit_type
]
instert_sql
=
"""replace into new_user_protratit_page_stay(
partition_day,pid,protratit_count,protratit_type) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}');"""
.
format
(
partition_day
=
today_str
,
pid
=
pid
,
protratit_count
=
action_count
,
protratit_type
=
str
(
protratit_type
)
)
# print("-------------------------------")
# for count, res in enumerate(sql_res):
# # print(count, res)
# track = res.track
# if not track:
# continue
# track_list = track.split(",")
# for one_key_word in track_list:
# if one_key_word in res_dict:
# res_dict[one_key_word] += 1
# else:
# res_dict[one_key_word] = 1
# print(res_dict)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment