Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
ef69f2cc
Commit
ef69f2cc
authored
6 years ago
by
段英荣
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into 'test'
Master See merge request
!289
parents
4526073e
f5f2c418
master
deploy/like-prod
deploy/like-stage
deploy/like-test
dev
like-pre/r01
test
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
197 additions
and
242 deletions
+197
-242
tasks.py
injection/data_sync/tasks.py
+2
-0
es.py
libs/es.py
+17
-3
tools.py
libs/tools.py
+3
-3
register_user_tag.py
linucb/utils/register_user_tag.py
+37
-2
collect_data.py
linucb/views/collect_data.py
+62
-36
topic.py
search/utils/topic.py
+13
-84
tag.py
search/views/tag.py
+2
-2
topic.py
search/views/topic.py
+41
-102
topic-high-star.json
trans2es/mapping/topic-high-star.json
+2
-1
topic-star-routing.json
trans2es/mapping/topic-star-routing.json
+2
-1
topic-v1.json
trans2es/mapping/topic-v1.json
+4
-2
topic.json
trans2es/mapping/topic.json
+2
-1
topic.py
trans2es/models/topic.py
+4
-4
topic_transfer.py
trans2es/utils/topic_transfer.py
+6
-1
No files found.
injection/data_sync/tasks.py
View file @
ef69f2cc
...
...
@@ -20,6 +20,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
if
es_type
==
"register_user_tag"
:
RegisterUserTag
.
get_register_user_tag
(
pk_list
)
elif
es_type
==
"attention_user_tag"
:
RegisterUserTag
.
get_user_attention_tag
(
pk_list
)
else
:
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
...
...
This diff is collapsed.
Click to expand it.
libs/es.py
View file @
ef69f2cc
...
...
@@ -264,7 +264,7 @@ class ESPerform(object):
return
True
@classmethod
def
get_tag_topic_list
(
cls
,
tag_id
,
have_read_topic_id_list
):
def
get_tag_topic_list
(
cls
,
tag_id
,
have_read_topic_id_list
,
size
=
100
):
try
:
functions_list
=
list
()
for
id
in
tag_id
:
...
...
@@ -274,13 +274,27 @@ class ESPerform(object):
"weight"
:
1
}
)
functions_list
+=
[
{
"filter"
:
{
"term"
:
{
"content_level"
:
6
}},
"weight"
:
6000
},
{
"filter"
:
{
"term"
:
{
"content_level"
:
5
}},
"weight"
:
5000
},
{
"filter"
:
{
"term"
:
{
"content_level"
:
4
}},
"weight"
:
4000
}
]
q
=
{
"query"
:{
"function_score"
:{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
5
}}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
6
}}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}},
{
"terms"
:
{
"tag_list"
:
tag_id
}}
...
...
@@ -308,7 +322,7 @@ class ESPerform(object):
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
0
,
size
=
100
,
routing
=
"3,4,5
"
)
offset
=
0
,
size
=
size
,
routing
=
"4,5,6
"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_list
))
...
...
This diff is collapsed.
Click to expand it.
libs/tools.py
View file @
ef69f2cc
...
...
@@ -25,10 +25,10 @@ def tzlc(dt, truncate_to_sec=True):
def
get_have_read_topic_id_list
(
device_id
,
user_id
,
query_type
):
try
:
if
user_id
==-
1
:
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
str
(
device_id
)
+
":query_type:"
+
str
(
query_type
)
else
:
if
user_id
and
int
(
user_id
)
>
0
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
str
(
device_id
)
+
":query_type:"
+
str
(
query_type
)
have_read_topic_id_list
=
list
()
...
...
This diff is collapsed.
Click to expand it.
linucb/utils/register_user_tag.py
View file @
ef69f2cc
...
...
@@ -9,7 +9,7 @@ import traceback
import
json
import
pickle
from
django.conf
import
settings
from
trans2es.models.tag
import
AccountUserTag
from
trans2es.models.tag
import
AccountUserTag
,
CommunityTagFollow
from
libs.es
import
ESPerform
import
libs.tools
as
Tools
from
search.utils.common
import
*
...
...
@@ -30,8 +30,40 @@ class RegisterUserTag(object):
linucb_user_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key
=
"physical:linucb:register_user_tag_info"
@classmethod
def
get_user_attention_tag
(
cls
,
pk_list
):
"""
:remark 获取用户关注标签
:param pk_list:
:return:
"""
try
:
user_id_dict
=
dict
()
query_results
=
CommunityTagFollow
.
objects
.
filter
(
pk__in
=
pk_list
,
is_deleted
=
False
,
is_online
=
True
)
for
item
in
query_results
:
tag_id
=
item
.
tag_id
user_id
=
item
.
user_id
user_tag_list
=
CommunityTagFollow
.
objects
.
filter
(
user
=
user_id
,
is_deleted
=
False
,
is_online
=
True
)
.
values_list
(
"tag_id"
,
flat
=
True
)
user_id_dict
[
user_id
]
=
user_tag_list
for
user_id
in
user_id_dict
:
redis_user_tag_id_data
=
redis_client
.
hget
(
cls
.
linucb_register_user_tag_key
,
user_id
)
redis_user_tag_id_list
=
json
.
loads
(
redis_user_tag_id_data
)
if
redis_user_tag_id_data
else
[]
redis_user_tag_id_list
.
extend
(
user_id_dict
[
user_id
])
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
set
(
redis_user_tag_id_list
))))
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
@classmethod
def
get_register_user_tag
(
cls
,
pk_list
):
"""
:remark 用户注册时选的标签
:param pk_list:
:return:
"""
try
:
# user_id_set = set()
user_id_dict
=
dict
()
...
...
@@ -43,7 +75,10 @@ class RegisterUserTag(object):
user_id_dict
[
user_id
]
=
user_tag_list
for
user_id
in
user_id_dict
:
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
user_id_dict
[
user_id
])))
redis_user_tag_id_data
=
redis_client
.
hget
(
cls
.
linucb_register_user_tag_key
,
user_id
)
redis_user_tag_id_list
=
json
.
loads
(
redis_user_tag_id_data
)
if
redis_user_tag_id_data
else
[]
redis_user_tag_id_list
.
extend
(
user_id_dict
[
user_id
])
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
set
(
redis_user_tag_id_list
))))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
...
...
This diff is collapsed.
Click to expand it.
linucb/views/collect_data.py
View file @
ef69f2cc
...
...
@@ -52,11 +52,8 @@ class CollectData(object):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
,
user_id
=
None
,
click_topic_tag_list
=
None
):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
,
user_id
=
None
,
click_topic_tag_list
=
None
,
new_user_click_tag_list
=
[]
):
try
:
recommend_tag_set
=
set
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
if
len
(
redis_linucb_tag_data_dict
)
==
0
:
recommend_tag_list
=
LinUCB
.
get_default_tag_list
(
user_id
)
...
...
@@ -72,9 +69,8 @@ class CollectData(object):
# Todo:设置过期时间,调研set是否支持
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
device_id
,
-
1
,
TopicPageType
.
HOME_RECOMMEND
)
promote_recommend_topic_id_list
=
list
()
promote_recommend_topic_id_list
=
TopicHomeRecommend
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
1
)
.
values_list
(
"topic_id"
)
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
device_id
,
user_id
,
TopicPageType
.
HOME_RECOMMEND
)
promote_recommend_topic_id_list
=
TopicHomeRecommend
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
1
)
.
values_list
(
"topic_id"
,
flat
=
True
)
have_read_topic_id_list
.
extend
(
promote_recommend_topic_id_list
)
recommend_topic_id_list
=
list
()
...
...
@@ -83,22 +79,17 @@ class CollectData(object):
if
click_topic_tag_list
:
if
len
(
click_topic_tag_list
)
>
0
:
recommend_topic_id_list_click
=
ESPerform
.
get_tag_topic_list
(
click_topic_tag_list
,
have_read_topic_id_list
)
have_read_topic_id_list
,
size
=
2
)
if
len
(
recommend_topic_id_list_click
)
>
0
:
num
=
min
(
len
(
recommend_topic_id_list_click
),
2
)
logging
.
info
(
"recommend_topic_id_list:
%
s"
%
(
str
(
num
)))
for
i
in
range
(
0
,
num
):
recommend_topic_id_list
.
append
(
recommend_topic_id_list_click
[
i
])
have_read_topic_id_list
.
extend
(
recommend_topic_id_list
)
click_recommend_redis_key
=
self
.
click_recommend_redis_key_prefix
+
str
(
device_id
)
click_redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
click_recommend_redis_key
,
click_redis_data_dict
)
total_topic_list
=
list
()
tag_topic_id_list
=
list
()
recommend_topic_id_list
.
extend
(
recommend_topic_id_list_click
)
have_read_topic_id_list
.
extend
(
recommend_topic_id_list
)
click_recommend_redis_key
=
self
.
click_recommend_redis_key_prefix
+
str
(
device_id
)
click_redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
click_recommend_redis_key
,
click_redis_data_dict
)
tag_id_list
=
recommend_tag_list
[
0
:
100
]
topic_recommend_redis_key
=
self
.
linucb_recommend_topic_id_prefix
+
str
(
device_id
)
...
...
@@ -112,9 +103,16 @@ class CollectData(object):
if
len
(
recommend_topic_id_list
)
==
0
and
cursor
==
0
and
len
(
redis_topic_list
)
>
0
:
have_read_topic_id_list
.
extend
(
redis_topic_list
[:
2
])
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id_list
,
have_read_topic_id_list
)
if
len
(
tag_id_list
)
>
0
:
if
len
(
new_user_click_tag_list
)
>
0
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
new_user_click_tag_list
,
have_read_topic_id_list
)
logging
.
warning
(
"tag_topic_id_list:
%
s"
%
str
(
new_user_click_tag_list
))
logging
.
warning
(
"tag_id_list:
%
s"
%
str
(
tag_id_list
))
else
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id_list
,
have_read_topic_id_list
)
if
len
(
recommend_topic_id_list
)
>
0
:
if
len
(
recommend_topic_id_list
)
>
0
or
len
(
new_user_click_tag_list
)
>
0
:
tag_topic_id_list
=
recommend_topic_id_list
+
tag_topic_id_list
redis_data_dict
=
{
"data"
:
json
.
dumps
(
tag_topic_id_list
),
...
...
@@ -165,8 +163,8 @@ class CollectData(object):
tag_list
=
list
()
click_topic_tag_list
=
list
()
collection_tag_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
)
.
values_list
(
"tag_id"
,
"is_online"
,
"is_collection"
)
if
len
(
collection_tag_sql_query_results
)
>
0
:
for
tag_id
,
is_online
,
is_collection
in
collection_tag_sql_query_results
:
#
if len(collection_tag_sql_query_results)>0:
for
tag_id
,
is_online
,
is_collection
in
collection_tag_sql_query_results
:
if
is_online
and
is_collection
==
1
:
click_topic_tag_list
.
append
(
tag_id
)
...
...
@@ -189,8 +187,8 @@ class CollectData(object):
reward
=
1
if
is_click
or
is_vote
else
0
logging
.
info
(
"positive tag_list,device_id:
%
s,topic_id:
%
s,tag_list:
%
s"
%
(
str
(
device_id
),
str
(
topic_id
),
str
(
tag_list
)))
for
tag_id
in
tag_list
:
str
(
device_id
),
str
(
topic_id
),
str
(
click_topic_
tag_list
)))
for
tag_id
in
click_topic_
tag_list
:
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
...
...
@@ -204,7 +202,7 @@ class CollectData(object):
exposure_cards_list
=
list
()
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
logging
.
warning
(
"type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
))
exposure_topic_id_list
=
list
()
for
item
in
exposure_cards_list
:
if
"card_id"
not
in
item
:
...
...
@@ -217,14 +215,17 @@ class CollectData(object):
topic_tag_id_dict
=
dict
()
tag_list
=
list
()
exposure_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id__in
=
exposure_topic_id_list
)
.
values_list
(
"topic_id"
,
"tag_id"
,
"is_online"
)
for
topic_id
,
tag_id
,
is_online
in
exposure_sql_query_results
:
exposure_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id__in
=
exposure_topic_id_list
)
.
values_list
(
"topic_id"
,
"tag_id"
,
"is_online"
,
"is_collection"
)
# if len(exposure_sql_query_results)>0:
for
topic_id
,
tag_id
,
is_online
,
is_collection
in
exposure_sql_query_results
:
if
is_online
and
is_collection
==
1
:
tag_list
.
append
(
tag_id
)
if
is_online
:
#
tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
#
id=tag_id).values_list("id", "collection", "is_ai")
#
for id, collection, is_ai in tag_sql_query_results:
# if collection == 1 or is_ai == 1
:
tag_list
.
append
(
tag_
id
)
tag_sql_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id
=
tag_id
)
.
values_list
(
"id"
,
"collection"
,
"is_ai"
)
for
id
,
collection
,
is_ai
in
tag_sql_query_results
:
if
(
is_ai
==
1
)
and
id
not
in
tag_list
:
tag_list
.
append
(
id
)
if
topic_id
not
in
topic_tag_id_dict
:
topic_tag_id_dict
[
topic_id
]
=
list
()
...
...
@@ -242,6 +243,31 @@ class CollectData(object):
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
)
elif
"type"
in
raw_val_dict
and
"interest_choice_click_next"
==
raw_val_dict
[
"type"
]:
if
isinstance
(
raw_val_dict
[
"params"
][
"tagid_list"
],
str
):
tagid_list
=
json
.
loads
(
raw_val_dict
[
"params"
][
"tagid_list"
])
elif
isinstance
(
raw_val_dict
[
"params"
][
"tagid_list"
],
list
):
tagid_list
=
raw_val_dict
[
"params"
][
"tagid_list"
]
else
:
tagid_list
=
list
()
logging
.
warning
(
"unknown type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
,
"missing type"
))
logging
.
info
(
"consume click topic_id:
%
s,device_id:
%
s"
%
(
str
(
tagid_list
),
str
(
device_id
)))
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
# if len(exposure_sql_query_results)>0:
if
len
(
tagid_list
)
>
0
:
is_click
=
1
is_vote
=
0
reward
=
1
if
is_click
or
is_vote
else
0
for
tag_id
in
tagid_list
:
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
,
new_user_click_tag_list
=
tagid_list
)
else
:
logging
.
warning
(
"unknown type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
,
"missing type"
))
except
:
...
...
This diff is collapsed.
Click to expand it.
search/utils/topic.py
View file @
ef69f2cc
...
...
@@ -124,7 +124,6 @@ class TopicUtils(object):
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
,
attention_tag_list
=
[]):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
:param user_id:
:param offset:
...
...
@@ -197,25 +196,6 @@ class TopicUtils(object):
"weight"
:
30
,
}
)
# if len(pick_user_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": pick_user_id_list}}}},
# "weight": 2
# }
# )
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms": {"user_id": same_pictorial_id_list}}}},
# "weight": 1
# }
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if
len
(
attention_tag_list
)
>
0
:
functions_list
.
append
(
{
...
...
@@ -224,40 +204,11 @@ class TopicUtils(object):
"weight"
:
100
}
)
# if len(recommend_tag_list)>0:
# if len(recommend_tag_list)>1:
# functions_list += [
# {
# "filter": {"term": {"tag_list": recommend_tag_list[0]}},
# "weight": 4
# },
# {
# "filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
# "weight": 3
# }
# ]
# else:
# functions_list.append(
# {
# "filter": {"terms": {"tag_list": recommend_tag_list}},
# "weight": 3
# }
# )
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
# low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score
=
{
"query"
:
{
"bool"
:
{
"filter"
:
[
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
5
}}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
6
}}},
# {"term": {"has_image":True}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
...
...
@@ -292,11 +243,9 @@ class TopicUtils(object):
}
}
if
len
(
filter_topic_id_list
)
>
0
:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
{
"terms"
:
{
"id"
:
filter_topic_id_list
}
}
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[
{
"terms"
:{
"id"
:
filter_topic_id_list
}}
]
if
query
is
not
None
:
# 搜索帖子
multi_fields
=
{
...
...
@@ -318,7 +267,15 @@ class TopicUtils(object):
{
"term"
:
{
"tag_list"
:
tag_id
}}
]
query_function_score
[
"query"
][
"bool"
][
"minimum_should_match"
]
=
1
else
:
if
"must_not"
in
query_function_score
[
"query"
][
"bool"
]:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
+=
[
{
"term"
:
{
"is_operation_home_recommend"
:
True
}}
]
else
:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[
{
"term"
:
{
"is_operation_home_recommend"
:
True
}}
]
q
[
"query"
][
"function_score"
]
=
query_function_score
q
[
"collapse"
]
=
{
"field"
:
"user_id"
...
...
@@ -350,37 +307,9 @@ class TopicUtils(object):
offset
=
offset
,
size
=
size
,
routing
=
routing
)
topic_id_list
=
list
()
same_group_id_set
=
set
()
same_user_id_set
=
set
()
for
item
in
result_dict
[
"hits"
]:
topic_id_list
.
append
(
item
[
"_source"
][
"id"
])
# for item in result_dict["hits"]:
# if item["_source"]["group_id"]>0 and item["_source"]["group_id"] not in same_group_id_set:
# same_group_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_group_id_set.add(item["_source"]["id"])
#
# if item["_source"]["user_id"] not in same_user_id_set:
# same_user_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_user_id_set.add(item["_source"]["id"])
#
# if len(topic_id_list) >= single_size:
# break
#
# if len(topic_id_list) < single_size:
# for topic_id in same_group_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
# for topic_id in same_user_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
logging
.
info
(
"topic_id_list:
%
s,attention_tag_list
%
s"
%
(
str
(
topic_id_list
),
str
(
attention_tag_list
)))
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
This diff is collapsed.
Click to expand it.
search/views/tag.py
View file @
ef69f2cc
...
...
@@ -134,10 +134,10 @@ def choice_push_tag(device_id, user_id):
if
len
(
unread_tag_list
)
>
0
:
for
tag_id
in
unread_tag_list
:
valid_tag_topic_num
=
TopicTag
.
objects
.
filter
(
tag_id
=
tag_id
,
is_online
=
True
)
.
count
()
if
valid_tag_topic_num
>
10
0
:
if
valid_tag_topic_num
>
0
:
ret_tag_set
.
add
(
tag_id
)
redis_push_tag_dict
[
tag_id
]
=
now_sec
if
len
(
ret_tag_set
)
>=
2
:
if
len
(
ret_tag_set
)
>=
1
:
break
redis_client
.
set
(
redis_push_tag_key
,
json
.
dumps
(
redis_push_tag_dict
))
...
...
This diff is collapsed.
Click to expand it.
search/views/topic.py
View file @
ef69f2cc
...
...
@@ -30,7 +30,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
single_size
=
size
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
index_type
=
"topic"
,
routing
=
"4,5"
)
filter_topic_id_list
=
have_read_topic_id_list
,
index_type
=
"topic"
,
routing
=
"4,5
,6
"
)
have_read_topic_id_list
.
extend
(
recommend_topic_ids
)
...
...
@@ -63,46 +63,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
topic_recommend_redis_key
=
"physical:linucb:topic_recommend:device_id:"
+
str
(
device_id
)
# click_recommend_redis_key = "physical:click_recommend:device_id:" + str(device_id)
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_topic_list
=
list
()
recommend_topic_dict
=
redis_client
.
hgetall
(
topic_recommend_redis_key
)
if
b
"data"
in
recommend_topic_dict
:
recommend_topic_id_list
=
json
.
loads
(
recommend_topic_dict
[
b
"data"
])
cursor
=
int
(
str
(
recommend_topic_dict
[
b
"cursor"
],
encoding
=
"utf-8"
))
newcursor
=
cursor
+
6
if
len
(
recommend_topic_id_list
)
>
newcursor
:
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
newcursor
]
redis_client
.
hset
(
topic_recommend_redis_key
,
"cursor"
,
newcursor
)
# click_recommend_topic_id_list = list()
# click_recommend_topic_list = list()
#
# click_recommend_topic_dict = redis_client.hgetall(click_recommend_redis_key)
# if b"data" in click_recommend_topic_dict:
# click_recommend_topic_id_list = json.loads(click_recommend_topic_dict[b"data"])
# cursor = int(str(click_recommend_topic_dict[b"cursor"], encoding="utf-8"))
# newcursor = cursor + 2
# if newcursor < 4 and len(click_recommend_topic_id_list) ==2:
# for i in range(0,2):
# click_recommend_topic_list.append(click_recommend_topic_id_list[i])
# redis_client.hset(click_recommend_redis_key, "cursor", newcursor)
# combine_recommend_topic_list_tmp = click_recommend_topic_list.extend(recommend_topic_list)
# combine_recommend_topic_list = combine_recommend_topic_list_tmp[0:5]
# 获取已读帖子
have_read_topic_id_list
=
list
()
if
redis_field_val_list
[
0
]:
if
query
is
None
:
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
else
:
if
offset
>
0
:
if
offset
>
0
:
# 首次搜索时不需要过滤已读
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
user_similar_score_redis_key
=
"physical:user_similar_score:user_id:"
+
str
(
user_id
)
...
...
@@ -110,31 +78,45 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
attention_tag_list
=
list
()
recommend_topic_list
=
list
()
if
query
is
None
:
# linucb 推荐帖子
topic_recommend_redis_key
=
"physical:linucb:topic_recommend:device_id:"
+
str
(
device_id
)
recommend_topic_dict
=
redis_client
.
hgetall
(
topic_recommend_redis_key
)
if
b
"data"
in
recommend_topic_dict
:
recommend_topic_id_list
=
json
.
loads
(
recommend_topic_dict
[
b
"data"
])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list
=
list
(
set
(
recommend_topic_id_list
)
-
set
(
have_read_topic_id_list
))
cursor
=
int
(
str
(
recommend_topic_dict
[
b
"cursor"
],
encoding
=
"utf-8"
))
newcursor
=
cursor
+
6
if
len
(
recommend_topic_id_list
)
>
newcursor
:
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
newcursor
]
redis_client
.
hset
(
topic_recommend_redis_key
,
"cursor"
,
newcursor
)
# 用户关注标签
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
attention_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
if
len
(
recommend_topic_list
)
>
0
:
size
=
size
-
len
(
recommend_topic_list
)
have_read_topic_id_list
.
extend
(
recommend_topic_list
)
# have_read_topic_id_list_add_promote = list()
# have_read_topic_id_list_add_promote.extend(have_read_topic_id_list)
# promote_recommend_topic_id_list = TopicHomeRecommend.objects.using(settings.SLAVE_DB_NAME).filter(
# is_online=1).values_list("topic_id",flat=True)
#
# for topic_id in promote_recommend_topic_id_list:
# have_read_topic_id_list_add_promote.append(topic_id)
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
attention_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
logging
.
info
(
"attention_tag_list:
%
s"
%
(
str
(
attention_tag_list
)))
if
len
(
recommend_topic_list
)
>
0
:
size
=
size
-
len
(
recommend_topic_list
)
have_read_topic_id_list
.
extend
(
recommend_topic_list
)
have_read_topic_id_list_add_promote
=
list
()
promote_recommend_topic_id_list
=
list
()
have_read_topic_id_list_add_promote
.
extend
(
have_read_topic_id_list
)
promote_recommend_topic_id_list
=
TopicHomeRecommend
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
1
)
.
values_list
(
"topic_id"
,
flat
=
True
)
if
len
(
promote_recommend_topic_id_list
)
>
0
:
for
topic_id
in
promote_recommend_topic_id_list
:
have_read_topic_id_list_add_promote
.
append
(
topic_id
)
topic_id_list
=
list
()
rank_topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
rank_topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
0
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list_add_promote
,
recommend_tag_list
=
recommend_topic_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5"
,
attention_tag_list
=
attention_tag_list
)
filter_topic_id_list
=
have_read_topic_id_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5,6"
,
attention_tag_list
=
attention_tag_list
)
if
(
len
(
recommend_topic_list
)
==
6
)
:
if
len
(
recommend_topic_list
)
==
6
and
query
is
None
:
if
(
size
<
11
):
topic_id_list
.
extend
(
rank_topic_id_list
[
0
:
3
])
topic_id_list
.
extend
(
recommend_topic_list
[
0
:
3
])
...
...
@@ -148,49 +130,6 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
else
:
topic_id_list
.
extend
(
rank_topic_id_list
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
logging
.
info
(
"attention_tag_list:
%
s"
%
(
str
(
topic_id_list
)))
# # 当前页小组数量
# cur_page_group_num = 0
# # 当前页用户数量
# cur_page_user_num = 0
#
# for topic_id in topic_id_dict:
# if topic_id_dict[topic_id][0] in have_read_group_id_set or topic_id_dict[topic_id][
# 1] in have_read_user_id_set:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
# else:
# if isinstance(topic_id_dict[topic_id][0], int) and topic_id_dict[topic_id][
# 0] > 0 and cur_page_group_num < (size * 0.9):
# have_read_group_id_set.add(topic_id_dict[topic_id][0])
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# have_read_topic_id_list.append(topic_id)
# cur_page_group_num += 1
# recommend_topic_ids.append(topic_id)
# elif topic_id_dict[topic_id] and cur_page_user_num < (size * 0.1):
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# cur_page_user_num += 1
# recommend_topic_ids.append(topic_id)
# have_read_topic_id_list.append(topic_id)
# else:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
#
# if len(recommend_topic_ids) >= size:
# break
# if len(recommend_topic_ids) < size and len(unread_topic_id_dict) > 0:
# for unread_topic_id in unread_topic_id_dict:
# if len(recommend_topic_ids) < size:
# recommend_topic_ids.append(unread_topic_id)
# have_read_topic_id_list.append(unread_topic_id)
# else:
# break
# topic_id_list.extend(recommend_topic_list)
# recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
30000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
...
...
@@ -305,7 +244,7 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10)
if
not
isinstance
(
device_id
,
str
):
device_id
=
""
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
tag_id
,
offset
,
size
,
query
)
recommend_topic_ids
=
get_home_recommend_topic_ids
(
user_id
,
device_id
,
tag_id
,
offset
=
offset
,
size
=
size
,
query
=
query
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -339,7 +278,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list
=
TopicUtils
.
get_topic_detail_recommend_list
(
user_id
,
topic_id
,
topic_tag_list
,
topic_pictorial_id
,
topic_user_id
,
filter_topic_user_id
,
have_read_topic_list
,
offset
,
size
,
es_cli_obj
,
index_type
=
"topic"
,
routing
=
"4,5"
)
have_read_topic_list
,
offset
,
size
,
es_cli_obj
,
index_type
=
"topic"
,
routing
=
"4,5
,6
"
)
recommend_topic_ids_list
=
list
()
if
len
(
result_list
)
>
0
:
recommend_topic_ids_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_list
]
...
...
@@ -414,7 +353,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
must_topic_id_list
=
list
(
topic_similarity_score_dict
.
keys
())
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
tag_id
=
0
,
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
must_topic_id_list
=
must_topic_id_list
,
index_type
=
"topic"
,
routing
=
"4,5"
)
must_topic_id_list
=
must_topic_id_list
,
index_type
=
"topic"
,
routing
=
"4,5
,6
"
)
return
{
"recommend_topic_ids"
:
topic_id_list
}
except
:
...
...
This diff is collapsed.
Click to expand it.
trans2es/mapping/topic-high-star.json
View file @
ef69f2cc
...
...
@@ -51,6 +51,7 @@
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"is_excellent"
:{
"type"
:
"long"
}
"is_excellent"
:{
"type"
:
"long"
},
"is_operation_home_recommend"
:
{
"type"
:
"boolean"
}
//是否首页运营推荐
}
}
This diff is collapsed.
Click to expand it.
trans2es/mapping/topic-star-routing.json
View file @
ef69f2cc
...
...
@@ -51,6 +51,7 @@
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"is_excellent"
:{
"type"
:
"long"
}
"is_excellent"
:{
"type"
:
"long"
},
"is_operation_home_recommend"
:
{
"type"
:
"boolean"
}
//是否首页运营推荐
}
}
This diff is collapsed.
Click to expand it.
trans2es/mapping/topic-v1.json
View file @
ef69f2cc
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
,
"default"
:
0
},
"total_vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
...
...
@@ -50,6 +51,7 @@
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"is_excellent"
:{
"type"
:
"long"
}
"is_excellent"
:{
"type"
:
"long"
},
"is_operation_home_recommend"
:
{
"type"
:
"boolean"
}
//是否首页运营推荐
}
}
This diff is collapsed.
Click to expand it.
trans2es/mapping/topic.json
View file @
ef69f2cc
...
...
@@ -51,6 +51,7 @@
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"is_excellent"
:{
"type"
:
"long"
}
"is_excellent"
:{
"type"
:
"long"
},
"is_operation_home_recommend"
:
{
"type"
:
"boolean"
}
//是否首页运营推荐
}
}
This diff is collapsed.
Click to expand it.
trans2es/models/topic.py
View file @
ef69f2cc
...
...
@@ -197,8 +197,8 @@ class Topic(models.Model):
offline_score
+=
6.0
elif
self
.
content_level
==
'4'
:
offline_score
+=
5.0
elif
self
.
content_level
==
'
3
'
:
offline_score
+=
2
.0
elif
self
.
content_level
==
'
6
'
:
offline_score
+=
100
.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
...
...
@@ -289,4 +289,5 @@ class TopicHomeRecommend(models.Model):
db_table
=
"topic_home_recommend"
id
=
models
.
IntegerField
(
verbose_name
=
u"id"
,
primary_key
=
True
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
\ No newline at end of file
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
This diff is collapsed.
Click to expand it.
trans2es/utils/topic_transfer.py
View file @
ef69f2cc
...
...
@@ -9,7 +9,7 @@ import time
import
re
import
datetime
from
trans2es.models.user
import
User
from
trans2es.models.topic
import
ExcellentTopic
from
trans2es.models.topic
import
ExcellentTopic
,
TopicHomeRecommend
class
TopicTransfer
(
object
):
...
...
@@ -121,6 +121,11 @@ class TopicTransfer(object):
else
:
res
[
"is_excellent"
]
=
0
res
[
"is_operation_home_recommend"
]
=
False
operation_home_recommend
=
TopicHomeRecommend
.
objects
.
filter
(
topic_id
=
instance
.
id
)
.
first
()
if
operation_home_recommend
and
operation_home_recommend
.
is_online
and
not
operation_home_recommend
.
is_deleted
:
res
[
"is_operation_home_recommend"
]
=
True
logging
.
info
(
"test topic transfer time cost,time0:
%
d,time1:
%
d,time2:
%
d,time3:
%
d,time4:
%
d"
%
(
time0
,
time1
,
time2
,
time3
,
time4
))
return
res
except
:
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment