Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
681909bc
Commit
681909bc
authored
Nov 12, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
20191111用户行为日志
parent
44b26426
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
+6
-5
gyz_test.py
eda/smart_rank/gyz_test.py
+6
-5
No files found.
eda/smart_rank/gyz_test.py
View file @
681909bc
...
...
@@ -65,13 +65,15 @@ def get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag,
# data
device_info
=
[]
with
open
(
"/home/gmuser/gyz/log/have_search_device_20191105.csv"
,
"r"
)
as
f
:
# sql: select cl_id, collect_set(params["query"]) from bl_hdfs_maidian_updates where partition_date="20191111" and action="do_search" group by cl_id
with
open
(
"/home/gmuser/gyz/log/have_search_device_20191111.csv"
,
"r"
)
as
f
:
for
line
in
f
.
readlines
():
data
=
line
.
strip
()
.
split
(
"="
)
device
=
data
[
0
]
search_words
=
eval
(
data
[
1
])
device_info
.
append
([
device
,
search_words
])
pay_time
=
157
28832
00
pay_time
=
157
34016
00
# 获取搜索词及其近义词对应的tag
all_word_tags
=
get_all_word_tags
()
all_tag_tag_type
=
get_all_tag_tag_type
()
...
...
@@ -100,5 +102,5 @@ result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_port
print
(
result
.
count
())
print
(
result
.
take
(
10
))
df
=
spark
.
createDataFrame
(
result
)
.
na
.
drop
()
.
toDF
(
"device"
,
"search_words"
,
"user_portrait"
)
.
na
.
drop
()
.
toPandas
()
df
.
to_csv
(
"~/test_df.csv"
,
index
=
False
)
spark
.
stop
()
\ No newline at end of file
df
.
to_csv
(
"~/gyz/log/user_action_20191111.csv"
,
index
=
False
)
spark
.
stop
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment