Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
44e1b0df
Commit
44e1b0df
authored
Oct 29, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
画像V3,冷启动和画像存储tag的name
parent
1187ed7d
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
2 deletions
+31
-2
dist_update_user_portrait_service.py
eda/smart_rank/dist_update_user_portrait_service.py
+19
-2
tool.py
eda/smart_rank/tool.py
+12
-0
No files found.
eda/smart_rank/dist_update_user_portrait_service.py
View file @
44e1b0df
...
...
@@ -40,7 +40,7 @@ def get_hot_search_words_tag():
return
[]
def
get_user_service_portrait
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
size
=
10
):
def
get_user_service_portrait
(
cl_id
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
all_tags_name
,
size
=
10
):
try
:
db_jerry_test
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
,
charset
=
'utf8'
)
...
...
@@ -136,6 +136,13 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
redis_client
.
hmset
(
cl_id_portrait_key2
,
gmkv_tag_score2_sum_dict
)
redis_client
.
expire
(
cl_id_portrait_key2
,
time
=
30
*
24
*
60
*
60
)
# 标签name写redis
cl_id_portrait_key3
=
"user:service_portrait_tags3:cl_id:"
+
str
(
cl_id
)
gmkv_tag_score3_sum_dict
=
{
all_tags_name
[
i
]:
gmkv_tag_score2_sum_dict
[
i
]
for
i
in
gmkv_tag_score2_sum_dict
}
redis_client
.
delete
(
cl_id_portrait_key3
)
redis_client
.
hmset
(
cl_id_portrait_key3
,
gmkv_tag_score3_sum_dict
)
redis_client
.
expire
(
cl_id_portrait_key3
,
time
=
30
*
24
*
60
*
60
)
# 写tidb,gmkv同步
stat_date
=
datetime
.
datetime
.
today
()
.
strftime
(
'
%
Y-
%
m-
%
d'
)
replace_sql
=
"""replace into user_service_portrait_tags (stat_date, cl_id, tag_list) values("{stat_date}","{cl_id}","{tag_list}")"""
\
...
...
@@ -182,6 +189,9 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag
=
get_all_3tag_2tag
()
# 标签id对应的中文名称
all_tags_name
=
get_all_tags_name
()
# 画像冷启动
hot_search_words
=
get_hot_search_words_tag
()
hot_search_words_portrait
=
list
()
...
...
@@ -203,8 +213,15 @@ if __name__ == '__main__':
hot_search_words_portrait_portrait_key2
=
"user:service_coldstart_tags2"
hot_search_words_portrait_dict
=
{
i
[
"id"
]:
0.2
for
i
in
hot_search_words
}
gm_kv_cli
.
hmset
(
hot_search_words_portrait_portrait_key2
,
hot_search_words_portrait_dict
)
redis_client
.
delete
(
hot_search_words_portrait_portrait_key2
)
redis_client
.
hmset
(
hot_search_words_portrait_portrait_key2
,
hot_search_words_portrait_dict
)
hot_search_words_portrait_portrait_key3
=
"user:service_coldstart_tags3"
hot_search_words_portrait3_dict
=
{
i
[
"keywords"
]:
0.2
for
i
in
hot_search_words
}
redis_client
.
delete
(
hot_search_words_portrait_portrait_key3
)
redis_client
.
hmset
(
hot_search_words_portrait_portrait_key3
,
hot_search_words_portrait3_dict
)
# 搜索词tag
search_words_synonym_tags_key
=
"search:words:synonym:tags"
search_words_synonym_tags_json
=
json
.
dumps
(
all_word_tags
)
...
...
@@ -224,7 +241,7 @@ if __name__ == '__main__':
spark
.
sparkContext
.
setLogLevel
(
"WARN"
)
spark
.
sparkContext
.
addPyFile
(
"/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py"
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
)
result
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
get_user_service_portrait
(
x
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
))
result
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
get_user_service_portrait
(
x
,
all_word_tags
,
all_tag_tag_type
,
all_3tag_2tag
,
all_tags_name
))
result
.
collect
()
except
Exception
as
e
:
...
...
eda/smart_rank/tool.py
View file @
44e1b0df
...
...
@@ -174,6 +174,18 @@ def get_all_3tag_2tag():
print
(
e
)
def
get_all_tags_name
():
try
:
sql
=
"select id, name from api_tag where tag_type+0<'4'+0 and is_online=1"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql
)
result_dict
=
dict
()
for
data
in
mysql_results
:
result_dict
[
data
[
'id'
]]
=
data
[
'name'
]
return
result_dict
except
Exception
as
e
:
print
(
e
)
def
get_tag2_from_tag3
(
tag3
,
all_3tag_2tag
,
user_log_df_tag2_list
):
try
:
tag2s
=
[]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment