Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
8aa90ee9
Commit
8aa90ee9
authored
Apr 01, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify
parent
cf209f71
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
116 additions
and
40 deletions
+116
-40
collect_data.py
linucb/views/collect_data.py
+69
-3
linucb.py
linucb/views/linucb.py
+7
-5
topic.py
search/utils/topic.py
+22
-22
topic.py
search/views/topic.py
+17
-10
topic.json
trans2es/mapping/topic.json
+1
-0
No files found.
linucb/views/collect_data.py
View file @
8aa90ee9
...
...
@@ -9,6 +9,9 @@ import json
from
trans2es.models.tag
import
TopicTag
import
traceback
from
django.conf
import
settings
from
libs.es
import
ESPerform
from
search.utils.common
import
*
class
KafkaManager
(
object
):
consumser_obj
=
None
...
...
@@ -28,6 +31,8 @@ class CollectData(object):
def
__init__
(
self
):
self
.
linucb_matrix_redis_prefix
=
"physical:linucb:device_id:"
self
.
linucb_recommend_redis_prefix
=
"physical:linucb:tag_recommend:device_id:"
self
.
linucb_recommend_topic_id_prefix
=
"physical:linucb:topic_recommend:device_id:"
self
.
tag_topic_id_redis_prefix
=
"physical:tag_id:topic_id_list:"
# 默认
self
.
user_feature
=
[
0
,
1
]
...
...
@@ -44,8 +49,38 @@ class CollectData(object):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
def
get_tag_topic_list
(
self
,
tag_id
):
try
:
q
=
{
"query"
:{
"bool"
:{
"must"
:[
{
"term"
:{
"is_online"
:
True
}},
{
"term"
:{
"is_deleted"
:
False
}},
{
"term"
:{
"tag_list"
:
tag_id
}}
]
}
},
"_source"
:{
"include"
:[
"id"
]
},
"sort"
:[
{
"create_time_val"
:{
"order"
:
"desc"
}},
{
"language_type"
:{
"order"
:
"asc"
}},
]
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic-high-star"
,
query_body
=
q
,
offset
=
0
,
size
=
5000
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
try
:
recommend_tag_set
=
set
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
...
...
@@ -54,15 +89,46 @@ class CollectData(object):
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
else
:
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
recommend_tag_dict
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
(
recommend_tag_dict
,
recommend_tag_set
)
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
logging
.
info
(
"duan add,device_id:
%
s,recommend_tag_list:
%
s"
%
(
str
(
device_id
),
str
(
recommend_tag_list
)))
if
len
(
recommend_tag_dict
)
>
0
:
recommend_tag_list
=
list
(
recommend_tag_set
)
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
list
(
recommend_tag_dict
.
keys
())
))
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
recommend_tag_list
))
# Todo:设置过期时间,调研set是否支持
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
TopicPageType
.
HOME_RECOMMEND
)
have_read_topic_id_list
=
list
()
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
if
redis_field_val_list
[
0
]:
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
recommend_topic_id_list
=
list
()
for
index
in
range
(
0
,
1000
):
for
tag_id
in
recommend_tag_list
[
0
:
5
]:
redis_tag_id_key
=
self
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
self
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
for
topic_id
in
tag_topic_id_list
:
if
topic_id
not
in
have_read_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
topic_recommend_redis_key
=
self
.
linucb_recommend_topic_id_prefix
+
str
(
device_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
return
True
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
linucb/views/linucb.py
View file @
8aa90ee9
...
...
@@ -14,7 +14,7 @@ from django.conf import settings
class
LinUCB
:
d
=
2
alpha
=
0.1
alpha
=
0.
0
1
r1
=
10
r0
=
-
0.1
default_tag_list
=
list
()
...
...
@@ -70,6 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set
=
set
()
top_tag_dict
=
dict
()
np_score_list
=
list
()
np_score_dict
=
dict
()
...
...
@@ -87,17 +88,18 @@ class LinUCB:
for
top_score_index
in
np_score_dict
[
top_score
]:
tag_id
=
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
)
top_tag_dict
[
tag_id
]
=
top_score
if
len
(
top_tag_dict
)
>=
10
:
top_tag_set
.
add
(
tag_id
)
if
len
(
top_tag_dict
)
>=
20
:
break
if
len
(
top_tag_dict
)
>=
1
0
:
if
len
(
top_tag_dict
)
>=
2
0
:
break
logging
.
info
(
"duan add,device_id:
%
s,sorted_np_score_list:
%
s,np_score_dict:
%
s"
%
(
str
(
device_id
),
str
(
sorted_np_score_list
),
str
(
np_score_dict
)))
return
top_tag_dict
return
(
top_tag_dict
,
top_tag_set
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{}
return
({},())
@classmethod
def
init_device_id_linucb_info
(
cls
,
redis_cli
,
redis_prefix
,
device_id
,
tag_list
):
...
...
search/utils/topic.py
View file @
8aa90ee9
...
...
@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type"
:
1
}
},
"weight"
:
4
"weight"
:
6
},
{
"gauss"
:
{
...
...
@@ -224,25 +224,25 @@ class TopicUtils(object):
"weight"
:
1
}
)
if
len
(
recommend_tag_list
)
>
0
:
if
len
(
recommend_tag_list
)
>
1
:
functions_list
+=
[
{
"filter"
:
{
"term"
:
{
"tag_list"
:
recommend_tag_list
[
0
]}},
"weight"
:
4
},
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
[
1
:]}},
"weight"
:
3
}
]
else
:
functions_list
.
append
(
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
}},
"weight"
:
3
}
)
#
if len(recommend_tag_list)>0:
#
if len(recommend_tag_list)>1:
#
functions_list += [
#
{
#
"filter": {"term": {"tag_list": recommend_tag_list[0]}},
#
"weight": 4
#
},
#
{
#
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
#
"weight": 3
#
}
#
]
#
else:
#
functions_list.append(
#
{
#
"filter": {"terms": {"tag_list": recommend_tag_list}},
#
"weight": 3
#
}
#
)
# for tag_id in recommend_tag_dict:
# functions_list.append(
...
...
@@ -252,13 +252,13 @@ class TopicUtils(object):
# }
# )
low_content_level
=
4
if
query_type
==
TopicPageType
.
FIND_PAGE
else
3
#
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score
=
{
"query"
:
{
"bool"
:
{
"filter"
:
[
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{
"term"
:
{
"has_image"
:
True
}},
#
{"term": {"has_image":True}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
],
...
...
search/views/topic.py
View file @
8aa90ee9
...
...
@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
try
:
if
query
is
None
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
# redis_key = "physical:home_recommend" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
# redis_key = "physical:home_query" + ":user_id:" + str(
# user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key
=
"physical:home_query"
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
...
...
@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list
=
list
()
tag_recommend_val
=
redis_client
.
get
(
tag_recommend_redis_key
)
if
tag_recommend_val
:
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
recommend_topic_list
=
list
()
recommend_tag_dict
=
redis_client
.
hgetall
(
tag_recommend_redis_key
)
if
"data"
in
recommend_tag_dict
:
recommend_topic_id_list
=
json
.
loads
(
recommend_tag_dict
[
"data"
])
cursor
=
recommend_tag_dict
[
"cursor"
]
if
len
(
recommend_topic_id_list
)
>
(
cursor
+
5
):
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
cursor
+
5
]
recommend_topic_ids
=
[]
have_read_topic_id_list
=
list
()
if
redis_field_val_list
[
0
]:
...
...
@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
size
=
size
-
len
(
recommend_topic_list
)
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_t
ag
_list
,
recommend_tag_list
=
recommend_t
opic
_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic-high-star"
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
...
...
@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
topic_id_list
=
recommend_topic_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
30000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
...
...
trans2es/mapping/topic.json
View file @
8aa90ee9
...
...
@@ -11,6 +11,7 @@
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"user_nick_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
//帖子用户名
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment