Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
b2437cea
Commit
b2437cea
authored
Mar 18, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into 'test'
Master See merge request
!183
parents
8675f182
d3dde44a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
100 additions
and
74 deletions
+100
-74
linucb.py
linucb/views/linucb.py
+2
-2
topic.py
search/utils/topic.py
+36
-18
topic.py
search/views/topic.py
+46
-48
user.py
search/views/user.py
+3
-1
tag_transfer.py
trans2es/utils/tag_transfer.py
+11
-4
user_transfer.py
trans2es/utils/user_transfer.py
+2
-1
No files found.
linucb/views/linucb.py
View file @
b2437cea
...
...
@@ -11,10 +11,10 @@ import json
import
pickle
class
LinUCB
:
d
=
2
d
=
1
alpha
=
0.25
r1
=
1
r0
=
-
16
r0
=
-
0.5
default_tag_list
=
list
()
@classmethod
...
...
search/utils/topic.py
View file @
b2437cea
...
...
@@ -120,8 +120,9 @@ class TopicUtils(object):
return
{}
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[]):
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
single_size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[]):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
...
...
@@ -291,9 +292,6 @@ class TopicUtils(object):
'operator'
:
'and'
,
'fields'
:
query_fields
,
}
logging
.
info
(
"get ----------------tag_id:
%
s"
%
query
)
logging
.
info
(
"get ----------------tag_id:
%
s"
%
tag_id
)
logging
.
info
(
"get ----------------tag_id:
%
s"
%
type
(
tag_id
))
query_function_score
[
"query"
][
"bool"
][
"should"
]
=
[
{
'multi_match'
:
multi_match
},
...
...
@@ -326,22 +324,42 @@ class TopicUtils(object):
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
offset
,
size
=
size
)
if
not
test_score
:
topic_id_dict
=
dict
()
for
item
in
result_dict
[
"hits"
]:
topic_id_dict
[
item
[
"_source"
][
"id"
]]
=
[
item
[
"_source"
][
"group_id"
],
item
[
"_source"
][
"user_id"
]]
return
topic_id_dict
else
:
topic_id_dict
=
dict
()
for
item
in
result_dict
[
"hits"
]:
topic_id_dict
[
item
[
"_source"
][
"id"
]]
=
[
item
[
"_source"
][
"group_id"
],
item
[
"_source"
][
"user_id"
],
item
[
"_score"
]]
return
topic_id_dict
topic_id_list
=
list
()
same_group_id_set
=
set
()
same_user_id_set
=
set
()
for
item
in
result_dict
[
"hits"
]:
topic_id_list
.
append
(
item
[
"_source"
][
"id"
])
# for item in result_dict["hits"]:
# if item["_source"]["group_id"]>0 and item["_source"]["group_id"] not in same_group_id_set:
# same_group_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_group_id_set.add(item["_source"]["id"])
#
# if item["_source"]["user_id"] not in same_user_id_set:
# same_user_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_user_id_set.add(item["_source"]["id"])
#
# if len(topic_id_list) >= single_size:
# break
#
# if len(topic_id_list) < single_size:
# for topic_id in same_group_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
# for topic_id in same_user_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dic
t
()
return
lis
t
()
@classmethod
def
get_topic_detail_recommend_list
(
cls
,
user_id
,
topic_id
,
topic_tag_list
,
topic_group_id
,
topic_user_id
,
filter_topic_user_id
,
have_read_topic_list
,
offset
,
size
,
es_cli_obj
=
None
):
...
...
search/views/topic.py
View file @
b2437cea
...
...
@@ -25,13 +25,10 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
have_read_topic_id_list
=
json
.
loads
(
redis_field_val_list
[
0
])
if
redis_field_val_list
[
0
]
else
[]
recommend_topic_ids
=
[]
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
recommend_topic_ids
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
single_size
=
size
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
)
for
topic_id
in
topic_id_dict
:
recommend_topic_ids
.
append
(
topic_id
)
have_read_topic_id_list
.
extend
(
recommend_topic_ids
)
redis_dict
=
{
...
...
@@ -83,7 +80,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
topic_id_dict
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
tag_id
,
offset
,
size
*
size
,
query
,
query_type
,
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_tag_list
,
user_similar_score_list
=
user_similar_score_redis_list
)
...
...
@@ -91,44 +89,45 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
# 当前页小组数量
cur_page_group_num
=
0
# 当前页用户数量
cur_page_user_num
=
0
for
topic_id
in
topic_id_dict
:
if
topic_id_dict
[
topic_id
][
0
]
in
have_read_group_id_set
or
topic_id_dict
[
topic_id
][
1
]
in
have_read_user_id_set
:
unread_topic_id_dict
[
topic_id
]
=
topic_id_dict
[
topic_id
]
else
:
if
isinstance
(
topic_id_dict
[
topic_id
][
0
],
int
)
and
topic_id_dict
[
topic_id
][
0
]
>
0
and
cur_page_group_num
<
(
size
*
0.9
):
have_read_group_id_set
.
add
(
topic_id_dict
[
topic_id
][
0
])
have_read_user_id_set
.
add
(
topic_id_dict
[
topic_id
][
1
])
have_read_topic_id_list
.
append
(
topic_id
)
cur_page_group_num
+=
1
recommend_topic_ids
.
append
(
topic_id
)
elif
topic_id_dict
[
topic_id
]
and
cur_page_user_num
<
(
size
*
0.1
):
have_read_user_id_set
.
add
(
topic_id_dict
[
topic_id
][
1
])
cur_page_user_num
+=
1
recommend_topic_ids
.
append
(
topic_id
)
have_read_topic_id_list
.
append
(
topic_id
)
else
:
unread_topic_id_dict
[
topic_id
]
=
topic_id_dict
[
topic_id
]
if
len
(
recommend_topic_ids
)
>=
size
:
break
if
len
(
recommend_topic_ids
)
<
size
and
len
(
unread_topic_id_dict
)
>
0
:
for
unread_topic_id
in
unread_topic_id_dict
:
if
len
(
recommend_topic_ids
)
<
size
:
recommend_topic_ids
.
append
(
unread_topic_id
)
have_read_topic_id_list
.
append
(
unread_topic_id
)
else
:
break
if
len
(
have_read_topic_id_list
)
>
10000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
10000
# # 当前页小组数量
# cur_page_group_num = 0
# # 当前页用户数量
# cur_page_user_num = 0
#
# for topic_id in topic_id_dict:
# if topic_id_dict[topic_id][0] in have_read_group_id_set or topic_id_dict[topic_id][
# 1] in have_read_user_id_set:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
# else:
# if isinstance(topic_id_dict[topic_id][0], int) and topic_id_dict[topic_id][
# 0] > 0 and cur_page_group_num < (size * 0.9):
# have_read_group_id_set.add(topic_id_dict[topic_id][0])
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# have_read_topic_id_list.append(topic_id)
# cur_page_group_num += 1
# recommend_topic_ids.append(topic_id)
# elif topic_id_dict[topic_id] and cur_page_user_num < (size * 0.1):
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# cur_page_user_num += 1
# recommend_topic_ids.append(topic_id)
# have_read_topic_id_list.append(topic_id)
# else:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
#
# if len(recommend_topic_ids) >= size:
# break
# if len(recommend_topic_ids) < size and len(unread_topic_id_dict) > 0:
# for unread_topic_id in unread_topic_id_dict:
# if len(recommend_topic_ids) < size:
# recommend_topic_ids.append(unread_topic_id)
# have_read_topic_id_list.append(unread_topic_id)
# else:
# break
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
5000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
5000
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
redis_dict
=
{
"have_read_topic_list"
:
json
.
dumps
(
have_read_topic_id_list
),
...
...
@@ -137,7 +136,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 每个session key保存15分钟
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
3
)
return
recommend_topic_ids
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
...
...
@@ -202,7 +201,7 @@ def discover_page(device_id="", user_id=-1, size=10):
@bind
(
"physical/search/home_query"
)
def
home_query
(
device_id
=
""
,
tag_id
=
0
,
user_id
=-
1
,
query
=
""
,
offset
=
0
,
size
=
10
):
def
home_query
(
device_id
=
""
,
tag_id
=
-
1
,
user_id
=-
1
,
query
=
""
,
offset
=
0
,
size
=
10
):
"""
:remark:首页搜索,目前只推荐日记
:param session_id:
...
...
@@ -325,11 +324,10 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
try
:
must_topic_id_list
=
list
(
topic_similarity_score_dict
.
keys
())
topic_id_
dict
=
TopicUtils
.
get_recommend_topic_ids
(
tag_id
=
0
,
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
topic_id_
list
=
TopicUtils
.
get_recommend_topic_ids
(
tag_id
=
0
,
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
single_
size
=
size
,
must_topic_id_list
=
must_topic_id_list
)
recommend_topic_ids
=
list
(
topic_id_dict
.
keys
())
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
return
{
"recommend_topic_ids"
:
topic_id_list
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"recommend_topic_id"
:
[]}
search/views/user.py
View file @
b2437cea
...
...
@@ -24,8 +24,10 @@ def recommend_user(self_user_id,interesting_user_id,offset=0,size=10):
:return:
"""
try
:
if
not
isinstance
(
self_user_id
,
int
):
if
not
isinstance
(
self_user_id
,
int
):
self_user_id
=
-
1
if
not
isinstance
(
interesting_user_id
,
int
):
interesting_user_id
=
-
1
#获取es链接对象
es_cli_obj
=
ESPerform
.
get_cli
()
...
...
trans2es/utils/tag_transfer.py
View file @
b2437cea
...
...
@@ -10,6 +10,9 @@ from libs.tools import tzlc
from
trans2es.models.topic
import
Topic
from
trans2es.models.tag
import
TopicTag
,
CommunityTagType
,
CommunityTagTypeRelation
import
datetime
from
django.conf
import
settings
class
TagTransfer
(
object
):
...
...
@@ -38,20 +41,24 @@ class TagTransfer(object):
res
[
"is_online"
]
=
instance
.
is_online
res
[
"is_deleted"
]
=
instance
.
is_deleted
res
[
"near_new_topic_num"
]
=
0
topic_num
=
0
res
[
"near_new_topic_num"
]
=
topic_num
if
instance
.
is_online
==
True
and
instance
.
is_deleted
==
False
:
topic_id_list
=
list
()
sql_result_list
=
TopicTag
.
objects
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"topic_id"
,
flat
=
True
)
sql_result_list
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
tag_id
=
instance
.
id
,
is_online
=
True
)
.
values_list
(
"topic_id"
,
flat
=
True
)
for
item_id
in
sql_result_list
:
topic_id_list
.
append
(
item_id
)
time_base_val
=
datetime
.
datetime
.
strftime
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
-
7
),
"
%
Y-
%
m-
%
d"
)
topic_num
=
Topic
.
objects
.
filter
(
id__in
=
topic_id_list
,
create_time__gte
=
time_base_val
)
.
count
()
for
topic_begin_index
in
range
(
0
,
len
(
topic_id_list
),
100
):
cur_topic_num
=
Topic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
topic_id_list
[
topic_begin_index
:
topic_begin_index
+
100
],
create_time__gte
=
time_base_val
)
.
count
()
topic_num
+=
cur_topic_num
res
[
"near_new_topic_num"
]
=
topic_num
tag_type_sql_list
=
CommunityTagTypeRelation
.
objects
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"tag_type_id"
,
flat
=
True
)
tag_type_sql_list
=
CommunityTagTypeRelation
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"tag_type_id"
,
flat
=
True
)
tag_type_list
=
list
()
for
tag_type_id
in
tag_type_sql_list
:
tag_type_list
.
append
(
tag_type_id
)
...
...
trans2es/utils/user_transfer.py
View file @
b2437cea
...
...
@@ -7,6 +7,7 @@ import traceback
import
time
from
libs.tools
import
tzlc
from
trans2es.models.user
import
User
from
django.conf
import
settings
class
UserTransfer
(
object
):
...
...
@@ -20,7 +21,7 @@ class UserTransfer(object):
follow_user_detail_list
=
list
()
for
i
in
range
(
0
,
len
(
follow_user_id_list
),
1000
):
sql_data_list
=
User
.
objects
.
filter
(
user_id__in
=
follow_user_id_list
[
i
:
i
+
1000
],
is_online
=
True
,
sql_data_list
=
User
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
user_id__in
=
follow_user_id_list
[
i
:
i
+
1000
],
is_online
=
True
,
is_deleted
=
False
)
for
detail_data
in
sql_data_list
:
item
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment