Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
8aa90ee9
Commit
8aa90ee9
authored
Apr 01, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify
parent
cf209f71
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
116 additions
and
40 deletions
+116
-40
collect_data.py
linucb/views/collect_data.py
+69
-3
linucb.py
linucb/views/linucb.py
+7
-5
topic.py
search/utils/topic.py
+22
-22
topic.py
search/views/topic.py
+17
-10
topic.json
trans2es/mapping/topic.json
+1
-0
No files found.
linucb/views/collect_data.py
View file @
8aa90ee9
...
@@ -9,6 +9,9 @@ import json
...
@@ -9,6 +9,9 @@ import json
from
trans2es.models.tag
import
TopicTag
from
trans2es.models.tag
import
TopicTag
import
traceback
import
traceback
from
django.conf
import
settings
from
django.conf
import
settings
from
libs.es
import
ESPerform
from
search.utils.common
import
*
class
KafkaManager
(
object
):
class
KafkaManager
(
object
):
consumser_obj
=
None
consumser_obj
=
None
...
@@ -28,6 +31,8 @@ class CollectData(object):
...
@@ -28,6 +31,8 @@ class CollectData(object):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
linucb_matrix_redis_prefix
=
"physical:linucb:device_id:"
self
.
linucb_matrix_redis_prefix
=
"physical:linucb:device_id:"
self
.
linucb_recommend_redis_prefix
=
"physical:linucb:tag_recommend:device_id:"
self
.
linucb_recommend_redis_prefix
=
"physical:linucb:tag_recommend:device_id:"
self
.
linucb_recommend_topic_id_prefix
=
"physical:linucb:topic_recommend:device_id:"
self
.
tag_topic_id_redis_prefix
=
"physical:tag_id:topic_id_list:"
# 默认
# 默认
self
.
user_feature
=
[
0
,
1
]
self
.
user_feature
=
[
0
,
1
]
...
@@ -44,8 +49,38 @@ class CollectData(object):
...
@@ -44,8 +49,38 @@ class CollectData(object):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
return
dict
()
def
get_tag_topic_list
(
self
,
tag_id
):
try
:
q
=
{
"query"
:{
"bool"
:{
"must"
:[
{
"term"
:{
"is_online"
:
True
}},
{
"term"
:{
"is_deleted"
:
False
}},
{
"term"
:{
"tag_list"
:
tag_id
}}
]
}
},
"_source"
:{
"include"
:[
"id"
]
},
"sort"
:[
{
"create_time_val"
:{
"order"
:
"desc"
}},
{
"language_type"
:{
"order"
:
"asc"
}},
]
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic-high-star"
,
query_body
=
q
,
offset
=
0
,
size
=
5000
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
try
:
try
:
recommend_tag_set
=
set
()
recommend_tag_list
=
list
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
...
@@ -54,15 +89,46 @@ class CollectData(object):
...
@@ -54,15 +89,46 @@ class CollectData(object):
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
else
:
else
:
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
recommend_tag_dict
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
(
recommend_tag_dict
,
recommend_tag_set
)
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
logging
.
info
(
"duan add,device_id:
%
s,recommend_tag_list:
%
s"
%
(
str
(
device_id
),
str
(
recommend_tag_list
)))
if
len
(
recommend_tag_dict
)
>
0
:
if
len
(
recommend_tag_dict
)
>
0
:
recommend_tag_list
=
list
(
recommend_tag_set
)
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
list
(
recommend_tag_dict
.
keys
())
))
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
recommend_tag_list
))
# Todo:设置过期时间,调研set是否支持
# Todo:设置过期时间,调研set是否支持
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
TopicPageType
.
HOME_RECOMMEND
)
have_read_topic_id_list
=
list
()
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
if
redis_field_val_list
[
0
]:
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
recommend_topic_id_list
=
list
()
for
index
in
range
(
0
,
1000
):
for
tag_id
in
recommend_tag_list
[
0
:
5
]:
redis_tag_id_key
=
self
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
self
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
for
topic_id
in
tag_topic_id_list
:
if
topic_id
not
in
have_read_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
topic_recommend_redis_key
=
self
.
linucb_recommend_topic_id_prefix
+
str
(
device_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
return
True
return
True
except
:
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
linucb/views/linucb.py
View file @
8aa90ee9
...
@@ -14,7 +14,7 @@ from django.conf import settings
...
@@ -14,7 +14,7 @@ from django.conf import settings
class
LinUCB
:
class
LinUCB
:
d
=
2
d
=
2
alpha
=
0.1
alpha
=
0.
0
1
r1
=
10
r1
=
10
r0
=
-
0.1
r0
=
-
0.1
default_tag_list
=
list
()
default_tag_list
=
list
()
...
@@ -70,6 +70,7 @@ class LinUCB:
...
@@ -70,6 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set
=
set
()
top_tag_dict
=
dict
()
top_tag_dict
=
dict
()
np_score_list
=
list
()
np_score_list
=
list
()
np_score_dict
=
dict
()
np_score_dict
=
dict
()
...
@@ -87,17 +88,18 @@ class LinUCB:
...
@@ -87,17 +88,18 @@ class LinUCB:
for
top_score_index
in
np_score_dict
[
top_score
]:
for
top_score_index
in
np_score_dict
[
top_score
]:
tag_id
=
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
)
tag_id
=
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
)
top_tag_dict
[
tag_id
]
=
top_score
top_tag_dict
[
tag_id
]
=
top_score
if
len
(
top_tag_dict
)
>=
10
:
top_tag_set
.
add
(
tag_id
)
if
len
(
top_tag_dict
)
>=
20
:
break
break
if
len
(
top_tag_dict
)
>=
1
0
:
if
len
(
top_tag_dict
)
>=
2
0
:
break
break
logging
.
info
(
"duan add,device_id:
%
s,sorted_np_score_list:
%
s,np_score_dict:
%
s"
%
(
str
(
device_id
),
str
(
sorted_np_score_list
),
str
(
np_score_dict
)))
logging
.
info
(
"duan add,device_id:
%
s,sorted_np_score_list:
%
s,np_score_dict:
%
s"
%
(
str
(
device_id
),
str
(
sorted_np_score_list
),
str
(
np_score_dict
)))
return
top_tag_dict
return
(
top_tag_dict
,
top_tag_set
)
except
:
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{}
return
({},())
@classmethod
@classmethod
def
init_device_id_linucb_info
(
cls
,
redis_cli
,
redis_prefix
,
device_id
,
tag_list
):
def
init_device_id_linucb_info
(
cls
,
redis_cli
,
redis_prefix
,
device_id
,
tag_list
):
...
...
search/utils/topic.py
View file @
8aa90ee9
...
@@ -165,7 +165,7 @@ class TopicUtils(object):
...
@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type"
:
1
"language_type"
:
1
}
}
},
},
"weight"
:
4
"weight"
:
6
},
},
{
{
"gauss"
:
{
"gauss"
:
{
...
@@ -224,25 +224,25 @@ class TopicUtils(object):
...
@@ -224,25 +224,25 @@ class TopicUtils(object):
"weight"
:
1
"weight"
:
1
}
}
)
)
if
len
(
recommend_tag_list
)
>
0
:
#
if len(recommend_tag_list)>0:
if
len
(
recommend_tag_list
)
>
1
:
#
if len(recommend_tag_list)>1:
functions_list
+=
[
#
functions_list += [
{
#
{
"filter"
:
{
"term"
:
{
"tag_list"
:
recommend_tag_list
[
0
]}},
#
"filter": {"term": {"tag_list": recommend_tag_list[0]}},
"weight"
:
4
#
"weight": 4
},
#
},
{
#
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
[
1
:]}},
#
"filter": {"terms": {"tag_list": recommend_tag_list[1:]}},
"weight"
:
3
#
"weight": 3
}
#
}
]
#
]
else
:
#
else:
functions_list
.
append
(
#
functions_list.append(
{
#
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
}},
#
"filter": {"terms": {"tag_list": recommend_tag_list}},
"weight"
:
3
#
"weight": 3
}
#
}
)
#
)
# for tag_id in recommend_tag_dict:
# for tag_id in recommend_tag_dict:
# functions_list.append(
# functions_list.append(
...
@@ -252,13 +252,13 @@ class TopicUtils(object):
...
@@ -252,13 +252,13 @@ class TopicUtils(object):
# }
# }
# )
# )
low_content_level
=
4
if
query_type
==
TopicPageType
.
FIND_PAGE
else
3
#
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score
=
{
query_function_score
=
{
"query"
:
{
"query"
:
{
"bool"
:
{
"bool"
:
{
"filter"
:
[
"filter"
:
[
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{
"term"
:
{
"has_image"
:
True
}},
#
{"term": {"has_image":True}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
{
"term"
:
{
"is_deleted"
:
False
}}
],
],
...
...
search/views/topic.py
View file @
8aa90ee9
...
@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -46,11 +46,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
try
:
try
:
if
query
is
None
:
if
query
is
None
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
# redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id
)
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
# user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key
=
"physical:home_recommend"
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
else
:
else
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
# redis_key = "physical:home_query" + ":user_id:" + str(
user_id
)
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
# user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key
=
"physical:home_query"
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
...
@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -63,12 +66,14 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# if tag_recommend_val:
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list
=
list
()
recommend_topic_list
=
list
()
tag_recommend_val
=
redis_client
.
get
(
tag_recommend_redis_key
)
recommend_tag_dict
=
redis_client
.
hgetall
(
tag_recommend_redis_key
)
if
tag_recommend_val
:
if
"data"
in
recommend_tag_dict
:
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
recommend_topic_id_list
=
json
.
loads
(
recommend_tag_dict
[
"data"
])
cursor
=
recommend_tag_dict
[
"cursor"
]
if
len
(
recommend_topic_id_list
)
>
(
cursor
+
5
):
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
cursor
+
5
]
recommend_topic_ids
=
[]
have_read_topic_id_list
=
list
()
have_read_topic_id_list
=
list
()
if
redis_field_val_list
[
0
]:
if
redis_field_val_list
[
0
]:
...
@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -83,10 +88,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
size
=
size
-
len
(
recommend_topic_list
)
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_t
ag
_list
,
recommend_tag_list
=
recommend_t
opic
_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic-high-star"
)
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic-high-star"
)
have_read_group_id_set
=
set
()
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
have_read_user_id_set
=
set
()
...
@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -128,6 +134,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# else:
# break
# break
topic_id_list
=
recommend_topic_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
30000
:
if
len
(
have_read_topic_id_list
)
>
30000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
...
...
trans2es/mapping/topic.json
View file @
8aa90ee9
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content_level"
:{
"type"
:
"text"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"user_id"
:{
"type"
:
"long"
},
"user_nick_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
//帖子用户名
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment