Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
dfd16540
Commit
dfd16540
authored
Oct 10, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
用户画像冷启动
parent
a046825a
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
35 additions
and
0 deletions
+35
-0
dist_update_user_portrait_service.py
eda/smart_rank/dist_update_user_portrait_service.py
+35
-0
No files found.
eda/smart_rank/dist_update_user_portrait_service.py
View file @
dfd16540
...
...
@@ -20,6 +20,26 @@ from pyspark.sql.functions import concat_ws
from
tool
import
*
def
get_hot_search_words_tag
():
try
:
hot_search
=
"""
SELECT a.keywords,
b.id,
b.tag_type
FROM api_hot_search_words a
LEFT JOIN api_tag b ON a.keywords=b.name
WHERE a.is_delete=0
AND b.tag_type+0<'4'+0
AND b.is_online=1
ORDER BY a.sorted DESC LIMIT 10
"""
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
hot_search
)
return
mysql_results
except
Exception
as
e
:
print
(
e
)
return
[]
def
get_user_service_portrait
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
...
...
@@ -140,6 +160,21 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag
=
get_all_3tag_2tag
()
# 画像冷启动
hot_search_words
=
get_hot_search_words_tag
()
hot_search_words_portrait
=
list
()
for
tag_info
in
hot_search_words
:
tmp
=
dict
()
tmp
[
"tag_score"
]
=
10
tmp
[
"weight"
]
=
10
tmp
[
"tag2"
]
=
tag_info
[
"id"
]
hot_search_words_portrait
.
append
(
tmp
)
gm_kv_cli
=
redis
.
Redis
(
host
=
"172.16.40.135"
,
port
=
5379
,
db
=
2
,
socket_timeout
=
2000
)
hot_search_words_portrait_portrait_key
=
"user:service_coldstart_tags:cl_id:"
hot_search_words_portrait_json
=
json
.
dumps
(
hot_search_words_portrait
)
gm_kv_cli
.
set
(
hot_search_words_portrait_portrait_key
,
hot_search_words_portrait_json
)
gm_kv_cli
.
expire
(
hot_search_words_portrait_portrait_key
,
time
=
30
*
24
*
60
*
60
)
# rdd
sparkConf
=
SparkConf
()
.
set
(
"spark.hive.mapred.supports.subdirectories"
,
"true"
)
\
.
set
(
"spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive"
,
"true"
)
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment