Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
c125262b
Commit
c125262b
authored
Mar 14, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into 'similar_sort'
# Conflicts: # search/views/topic.py
parents
4654b320
3e502a65
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
75 additions
and
55 deletions
+75
-55
topic.py
search/utils/topic.py
+4
-2
topic.py
search/views/topic.py
+70
-52
trans2es_data2es_parallel.py
trans2es/management/commands/trans2es_data2es_parallel.py
+1
-1
No files found.
search/utils/topic.py
View file @
c125262b
...
...
@@ -121,7 +121,7 @@ class TopicUtils(object):
return
{}
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
offset
,
size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[]):
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[]):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
...
...
@@ -291,8 +291,10 @@ class TopicUtils(object):
'operator'
:
'and'
,
'fields'
:
query_fields
,
}
query_function_score
[
"query"
][
"bool"
][
"should"
]
=
[
{
'multi_match'
:
multi_match
}
{
'multi_match'
:
multi_match
},
{
"term"
:{
"tag_list"
:
tag_id
}}
]
query_function_score
[
"query"
][
"bool"
][
"minimum_should_match"
]
=
1
...
...
search/views/topic.py
View file @
c125262b
...
...
@@ -13,7 +13,7 @@ from search.utils.common import *
from
libs.es
import
ESPerform
def
get_discover_page_topic_ids
(
user_id
,
device_id
,
size
,
query_type
=
TopicPageType
.
FIND_PAGE
):
def
get_discover_page_topic_ids
(
user_id
,
device_id
,
size
,
query_type
=
TopicPageType
.
FIND_PAGE
):
try
:
if
user_id
==
-
1
:
redis_key
=
"physical:discover_page"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
...
...
@@ -21,12 +21,14 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_key
=
"physical:discover_page"
+
":user_id:"
+
str
(
user_id
)
redis_field_list
=
[
b
'have_read_topic_id'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
have_read_topic_id_list
=
json
.
loads
(
redis_field_val_list
[
0
])
if
redis_field_val_list
[
0
]
else
[]
recommend_topic_ids
=
[]
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
offset
=
0
,
size
=
size
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
)
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
)
for
topic_id
in
topic_id_dict
:
recommend_topic_ids
.
append
(
topic_id
)
...
...
@@ -35,35 +37,40 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_dict
=
{
"have_read_topic_id"
:
json
.
dumps
(
have_read_topic_id_list
)
}
redis_client
.
hmset
(
redis_key
,
redis_dict
)
redis_client
.
hmset
(
redis_key
,
redis_dict
)
return
recommend_topic_ids
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
def
get_home_recommend_topic_ids
(
user_id
,
device_id
,
offset
,
size
,
query
=
None
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
def
get_home_recommend_topic_ids
(
user_id
,
device_id
,
tag_id
,
offset
,
size
,
query
=
None
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
try
:
if
query
is
None
:
if
user_id
==
-
1
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":query_type:"
+
str
(
query_type
)
else
:
if
user_id
==
-
1
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
tag_recommend_redis_key
=
"physical:linucb:tag_recommend:device_id:"
+
str
(
device_id
)
recommend_tag_list
=
[]
tag_recommend_val
=
redis_client
.
get
(
tag_recommend_redis_key
)
if
tag_recommend_val
:
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
recommend_topic_ids
=
[]
have_read_topic_id_list
=
list
()
...
...
@@ -73,9 +80,13 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
user_similar_score_redis_key
=
"physical:user_similar_score:user_id:"
+
str
(
user_id
)
redis_user_similar_score_redis_val
=
redis_client
.
get
(
user_similar_score_redis_key
)
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
offset
,
size
*
size
,
query
,
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_tag_list
,
user_similar_score_list
=
user_similar_score_redis_list
)
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
tag_id
,
offset
,
size
*
size
,
query
,
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_tag_list
,
user_similar_score_list
=
user_similar_score_redis_list
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
...
...
@@ -86,16 +97,18 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
cur_page_user_num
=
0
for
topic_id
in
topic_id_dict
:
if
topic_id_dict
[
topic_id
][
0
]
in
have_read_group_id_set
or
topic_id_dict
[
topic_id
][
1
]
in
have_read_user_id_set
:
if
topic_id_dict
[
topic_id
][
0
]
in
have_read_group_id_set
or
topic_id_dict
[
topic_id
][
1
]
in
have_read_user_id_set
:
unread_topic_id_dict
[
topic_id
]
=
topic_id_dict
[
topic_id
]
else
:
if
isinstance
(
topic_id_dict
[
topic_id
][
0
],
int
)
and
topic_id_dict
[
topic_id
][
0
]
>
0
and
cur_page_group_num
<
(
size
*
0.9
):
if
isinstance
(
topic_id_dict
[
topic_id
][
0
],
int
)
and
topic_id_dict
[
topic_id
][
0
]
>
0
and
cur_page_group_num
<
(
size
*
0.9
):
have_read_group_id_set
.
add
(
topic_id_dict
[
topic_id
][
0
])
have_read_user_id_set
.
add
(
topic_id_dict
[
topic_id
][
1
])
have_read_topic_id_list
.
append
(
topic_id
)
cur_page_group_num
+=
1
recommend_topic_ids
.
append
(
topic_id
)
elif
topic_id_dict
[
topic_id
]
and
cur_page_user_num
<
(
size
*
0.1
):
elif
topic_id_dict
[
topic_id
]
and
cur_page_user_num
<
(
size
*
0.1
):
have_read_user_id_set
.
add
(
topic_id_dict
[
topic_id
][
1
])
cur_page_user_num
+=
1
recommend_topic_ids
.
append
(
topic_id
)
...
...
@@ -106,10 +119,9 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
if
len
(
recommend_topic_ids
)
>=
size
:
break
if
len
(
recommend_topic_ids
)
<
size
and
len
(
unread_topic_id_dict
)
>
0
:
if
len
(
recommend_topic_ids
)
<
size
and
len
(
unread_topic_id_dict
)
>
0
:
for
unread_topic_id
in
unread_topic_id_dict
:
if
len
(
recommend_topic_ids
)
<
size
:
if
len
(
recommend_topic_ids
)
<
size
:
recommend_topic_ids
.
append
(
unread_topic_id
)
have_read_topic_id_list
.
append
(
unread_topic_id
)
else
:
...
...
@@ -119,27 +131,29 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
cut_len
=
len
(
have_read_topic_id_list
)
-
5000
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
redis_dict
=
{
"have_read_topic_list"
:
json
.
dumps
(
have_read_topic_id_list
),
"have_read_topic_list"
:
json
.
dumps
(
have_read_topic_id_list
),
}
redis_client
.
hmset
(
redis_key
,
redis_dict
)
redis_client
.
hmset
(
redis_key
,
redis_dict
)
# 每个session key保存15分钟
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
3
)
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
3
)
return
recommend_topic_ids
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
@bind
(
"physical/search/query_tag_id_by_topic"
)
def
query_tag_id_by_topic
(
offset
=
0
,
size
=
10
,
topic_id_list
=
[],
user_id
=-
1
):
def
query_tag_id_by_topic
(
offset
=
0
,
size
=
10
,
topic_id_list
=
[],
user_id
=-
1
):
try
:
return
TopicUtils
.
get_topic_tag_info
(
offset
,
size
,
topic_id_list
,
user_id
)
return
TopicUtils
.
get_topic_tag_info
(
offset
,
size
,
topic_id_list
,
user_id
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{}
@bind
(
"physical/search/home_recommend"
)
def
home_recommend
(
device_id
=
""
,
user_id
=-
1
,
offset
=
0
,
size
=
10
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
def
home_recommend
(
device_id
=
""
,
user_id
=-
1
,
offset
=
0
,
size
=
10
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
...
...
@@ -150,20 +164,21 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage
"""
try
:
if
not
user_id
:
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
device_id
=
""
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
offset
=
0
,
size
=
size
,
query_type
=
query_type
)
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
query_type
=
query_type
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_topic_ids"
:
[]}
@bind
(
"physical/search/discover_page"
)
def
discover_page
(
device_id
=
""
,
user_id
=-
1
,
size
=
10
):
def
discover_page
(
device_id
=
""
,
user_id
=-
1
,
size
=
10
):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
...
...
@@ -174,20 +189,20 @@ def discover_page(device_id="",user_id=-1,size=10):
"""
try
:
if
not
user_id
:
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
device_id
=
""
recommend_topic_ids
=
get_discover_page_topic_ids
(
user_id
,
device_id
,
size
,
query_type
=
TopicPageType
.
FIND_PAGE
)
recommend_topic_ids
=
get_discover_page_topic_ids
(
user_id
,
device_id
,
size
,
query_type
=
TopicPageType
.
FIND_PAGE
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_topic_ids"
:
[]}
@bind
(
"physical/search/home_query"
)
def
home_query
(
device_id
=
""
,
user_id
=-
1
,
query
=
""
,
offset
=
0
,
size
=
10
):
def
home_query
(
device_id
=
""
,
tag_id
=-
1
,
user_id
=-
1
,
query
=
""
,
offset
=
0
,
size
=
10
):
"""
:remark:首页搜索,目前只推荐日记
:param session_id:
...
...
@@ -199,19 +214,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
"""
try
:
if
not
user_id
:
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
user_id
=
-
1
if
not
isinstance
(
device_id
,
str
):
device_id
=
""
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
offset
,
size
,
query
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
tag_id
,
offset
,
size
,
query
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_topic_ids"
:
[]}
@bind
(
"physical/search/topic_detail_page_recommend"
)
def
topic_detail_page_recommend
(
device_id
=
""
,
user_id
=-
1
,
topic_id
=-
1
,
topic_group_id
=-
1
,
topic_user_id
=-
1
,
filter_topic_user_id
=
False
,
offset
=
0
,
size
=
10
):
def
topic_detail_page_recommend
(
device_id
=
""
,
user_id
=-
1
,
topic_id
=-
1
,
topic_group_id
=-
1
,
topic_user_id
=-
1
,
filter_topic_user_id
=
False
,
offset
=
0
,
size
=
10
):
"""
:remark:帖子详情页推荐策略,缺少第一个卡片策略
:param user_id:
...
...
@@ -221,29 +237,31 @@ def topic_detail_page_recommend(device_id="",user_id=-1,topic_id=-1,topic_group_
:return:
"""
try
:
if
not
isinstance
(
user_id
,
int
):
if
not
isinstance
(
user_id
,
int
):
user_id
=
-
1
redis_key
=
"physical:topic_detail_page_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
str
(
device_id
)
have_read_topic_redis_data
=
redis_client
.
get
(
redis_key
)
have_read_topic_list
=
json
.
loads
(
have_read_topic_redis_data
)
if
have_read_topic_redis_data
else
[]
#获取es链接对象
#
获取es链接对象
es_cli_obj
=
ESPerform
.
get_cli
()
# 获取帖子标签列表
topic_tag_list
=
TopicUtils
.
get_topic_tag_id_list
(
topic_id
,
es_cli_obj
)
topic_tag_list
=
TopicUtils
.
get_topic_tag_id_list
(
topic_id
,
es_cli_obj
)
result_list
=
TopicUtils
.
get_topic_detail_recommend_list
(
user_id
,
topic_id
,
topic_tag_list
,
topic_group_id
,
topic_user_id
,
filter_topic_user_id
,
have_read_topic_list
,
offset
,
size
,
es_cli_obj
)
result_list
=
TopicUtils
.
get_topic_detail_recommend_list
(
user_id
,
topic_id
,
topic_tag_list
,
topic_group_id
,
topic_user_id
,
filter_topic_user_id
,
have_read_topic_list
,
offset
,
size
,
es_cli_obj
)
recommend_topic_ids_list
=
list
()
if
len
(
result_list
)
>
0
:
if
len
(
result_list
)
>
0
:
recommend_topic_ids_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_list
]
have_read_topic_list
.
extend
(
recommend_topic_ids_list
)
have_read_topic_len
=
len
(
have_read_topic_list
)
if
have_read_topic_len
>
5000
:
have_read_topic_list
=
have_read_topic_list
[(
have_read_topic_len
-
5000
):]
if
have_read_topic_len
>
5000
:
have_read_topic_list
=
have_read_topic_list
[(
have_read_topic_len
-
5000
):]
redis_client
.
set
(
redis_key
,
json
.
dumps
(
have_read_topic_list
))
return
{
"recommend_topic_ids"
:
recommend_topic_ids_list
}
...
...
@@ -253,7 +271,7 @@ def topic_detail_page_recommend(device_id="",user_id=-1,topic_id=-1,topic_group_
@bind
(
"physical/search/query_topic_by_tag_aggregation"
)
def
query_topic_by_tag_aggregation
(
user_id
,
tag_id
,
offset
,
size
):
def
query_topic_by_tag_aggregation
(
user_id
,
tag_id
,
offset
,
size
):
"""
:remark 按标签聚合召回帖子
:param tag_id:
...
...
@@ -267,9 +285,9 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
if
not
tag_id
:
tag_id
=
-
1
result_list
=
TopicUtils
.
get_tag_aggregation_topic_id_list
(
user_id
,
tag_id
,
offset
,
size
)
result_list
=
TopicUtils
.
get_tag_aggregation_topic_id_list
(
user_id
,
tag_id
,
offset
,
size
)
recommend_topic_ids_list
=
list
()
if
len
(
result_list
)
>
0
:
if
len
(
result_list
)
>
0
:
recommend_topic_ids_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_list
]
return
{
"recommend_topic_id"
:
recommend_topic_ids_list
}
...
...
@@ -303,11 +321,11 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
"""
try
:
must_topic_id_list
=
list
(
topic_similarity_score_dict
.
keys
())
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
must_topic_id_list
=
must_topic_id_list
)
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
tag_id
=
0
,
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
must_topic_id_list
=
must_topic_id_list
)
recommend_topic_ids
=
list
(
topic_id_dict
.
keys
())
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_topic_id"
:
[]}
trans2es/management/commands/trans2es_data2es_parallel.py
View file @
c125262b
...
...
@@ -129,7 +129,7 @@ class Command(BaseCommand):
def
generate_topic_score_detail
(
self
):
try
:
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
241432787
,
0
,
500
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
,
test_score
=
True
)
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
241432787
,
0
,
0
,
500
,
query_type
=
TopicPageType
.
HOME_RECOMMEND
,
test_score
=
True
)
for
topic_id
in
topic_id_dict
:
offline_score
=
0.0
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment