Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
0b23d905
Commit
0b23d905
authored
Nov 11, 2019
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
首页feed新用户冷启动的日记、帖子、问答队列
parent
d72242ad
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
65 additions
and
15 deletions
+65
-15
dist_update_user_history_order_tags.py
eda/smart_rank/dist_update_user_history_order_tags.py
+1
-1
gm_feed_cold_start.py
eda/smart_rank/gm_feed_cold_start.py
+64
-14
No files found.
eda/smart_rank/dist_update_user_history_order_tags.py
View file @
0b23d905
...
...
@@ -31,7 +31,7 @@ def get_hot_search_words_tag():
WHERE a.is_delete=0
AND b.tag_type+0<'4'+0
AND b.is_online=1
ORDER BY a.sorted DESC
LIMIT 10
ORDER BY a.sorted DESC
"""
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
hot_search
)
return
mysql_results
...
...
eda/smart_rank/gm_feed_cold_start.py
View file @
0b23d905
import
redis
from
tool
import
es_query
from
tool
import
get_data_by_mysql
...
...
@@ -5,14 +6,14 @@ from tool import get_data_by_mysql
def
get_all_city_id
():
sql
=
"select distinct tag_id from api_city"
mysql_results
=
get_data_by_mysql
(
'172.16.30.141'
,
3306
,
'work'
,
'BJQaT9VzDcuPBqkd'
,
'zhengxing'
,
sql
)
city_
pinyin
s
=
[
i
[
"tag_id"
]
for
i
in
mysql_results
]
city_
pinyin
s
.
append
(
-
1
)
# 没有城市的情况
return
city_
pinyin
s
city_
tag_id
s
=
[
i
[
"tag_id"
]
for
i
in
mysql_results
]
city_
tag_id
s
.
append
(
-
1
)
# 没有城市的情况
return
city_
tag_id
s
def
search_diary_by_match_phrase
(
tag_names
,
city_
pinyin
):
def
search_diary_by_match_phrase
(
tag_names
,
city_
tag_id
):
q
=
dict
()
if
city_
pinyin
==
-
1
:
if
city_
tag_id
==
-
1
:
sort_list
=
[]
else
:
sort_list
=
[
...
...
@@ -21,7 +22,7 @@ def search_diary_by_match_phrase(tag_names, city_pinyin):
'script_file'
:
'sort_diary-recommend'
,
'type'
:
'number'
,
'params'
:
{
'user_city_tag_id'
:
city_
pinyin
,
'user_city_tag_id'
:
city_
tag_id
,
},
'order'
:
'desc'
,
'_cache'
:
True
,
...
...
@@ -74,7 +75,8 @@ def search_topic_by_match_phrase(tag_names):
term_dict2
=
{
"match_phrase"
:
{
"tractate_tag_name_content"
:
{
"query"
:
tag_name
"query"
:
tag_name
,
'analyzer'
:
'gm_default_index'
}
}
}
...
...
@@ -133,14 +135,62 @@ def search_qa_by_match_phrase(tag_names):
if
__name__
==
"__main__"
:
# 获取所有的城市tag id
all_city_id
=
get_all_city_id
()
all_city_tag_id
=
get_all_city_id
()
# 清空历史数据
redis_client
=
redis
.
StrictRedis
.
from_url
(
'redis://:ReDis!GmTx*0aN9@172.16.40.173:6379'
)
hot_search_word_topic_queue_key
=
"coldstart:hot:search:word:topic:queue"
hot_search_word_qa_queue_key
=
"coldstart:hot:search:word:qa:queue"
hot_search_word_diary_queue_key
=
"coldstart:hot:search:word:diary:queue"
light_clinic_beauty_topic_queue_key
=
"coldstart:light:clinic:beauty:topic:queue"
light_clinic_beauty_qa_queue_key
=
"coldstart:light:clinic:beauty:qa:queue"
light_clinic_beauty_diary_queue_key
=
"coldstart:light:clinic:beauty:diary:queue"
card_types
=
[
'diary'
,
'topic'
,
'qa'
]
word_refers
=
[
'hot_search_word'
,
'light_clinic_beauty'
]
for
card_type
in
card_types
:
for
word_refer
in
word_refers
:
key
=
word_refer
+
'_'
+
card_type
+
'_queue_key'
redis_client
.
delete
(
key
)
# 热搜词的候选队列
hot_search_word_diary_queue
=
list
()
hot_search_word_topic_queue
=
list
()
hot_search_word_qa_queue
=
list
()
hot_search_word_key
=
"user:service_coldstart_tags2_name"
hot_search_word
=
redis_client
.
hgetall
(
hot_search_word_key
)
hot_search_word
=
[
str
(
tag
,
'utf-8'
)
for
tag
in
hot_search_word
]
hot_search_word_diary_queue
=
dict
()
# 队列存储
hot_search_word_topic_queue
=
search_topic_by_match_phrase
(
hot_search_word
)
redis_client
.
lpush
(
hot_search_word_topic_queue_key
,
*
hot_search_word_topic_queue
)
hot_search_word_qa_queue
=
search_qa_by_match_phrase
(
hot_search_word
)
redis_client
.
lpush
(
hot_search_word_qa_queue_key
,
hot_search_word_qa_queue
)
for
city_tag_id
in
all_city_tag_id
:
diary_queue
=
search_diary_by_match_phrase
(
hot_search_word
,
city_tag_id
)
hot_search_word_diary_queue
.
update
({
city_tag_id
:
diary_queue
})
redis_client
.
hmset
(
hot_search_word_diary_queue_key
,
hot_search_word_diary_queue
)
# 轻医美的候选队列
light_clinic_beauty_diary_queue
=
list
()
light_clinic_beauty_topic_queue
=
list
()
light_clinic_beauty_qa_queue
=
list
()
light_clinic_beauty_key
=
"coldstart:user:service_coldstart_tags3"
light_clinic_beauty
=
redis_client
.
hgetall
(
light_clinic_beauty_key
)
light_clinic_beauty
=
[
str
(
tag
,
'utf-8'
)
for
tag
in
light_clinic_beauty
]
light_clinic_beauty_diary_queue
=
dict
()
# 队列存储
light_clinic_beauty_topic_queue
=
search_topic_by_match_phrase
(
light_clinic_beauty
)
redis_client
.
lpush
(
light_clinic_beauty_topic_queue_key
,
*
light_clinic_beauty_topic_queue
)
light_clinic_beauty_qa_queue
=
search_qa_by_match_phrase
(
light_clinic_beauty
)
redis_client
.
lpush
(
light_clinic_beauty_qa_queue_key
,
light_clinic_beauty_qa_queue
)
for
city_tag_id
in
all_city_tag_id
:
diary_queue
=
search_diary_by_match_phrase
(
light_clinic_beauty
,
city_tag_id
)
light_clinic_beauty_diary_queue
.
update
({
city_tag_id
:
diary_queue
})
redis_client
.
hmset
(
light_clinic_beauty_diary_queue_key
,
light_clinic_beauty_diary_queue
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment