Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
e3387b7b
Commit
e3387b7b
authored
Mar 29, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into 'test'
Master See merge request
!216
parents
0ad7f0ca
cf209f71
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
75 additions
and
41 deletions
+75
-41
collect_data.py
linucb/views/collect_data.py
+4
-3
linucb.py
linucb/views/linucb.py
+10
-9
topic.py
search/utils/topic.py
+49
-24
topic.py
search/views/topic.py
+10
-4
user_extra.py
trans2es/models/user_extra.py
+2
-1
No files found.
linucb/views/collect_data.py
View file @
e3387b7b
...
@@ -47,18 +47,19 @@ class CollectData(object):
...
@@ -47,18 +47,19 @@ class CollectData(object):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
try
:
try
:
recommend_tag_list
=
list
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
if
len
(
redis_linucb_tag_data_dict
)
==
0
:
if
len
(
redis_linucb_tag_data_dict
)
==
0
:
recommend_tag_list
=
LinUCB
.
get_default_tag_list
()
recommend_tag_list
=
LinUCB
.
get_default_tag_list
()
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
else
:
else
:
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
recommend_tag_
lis
t
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
recommend_tag_
dic
t
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
logging
.
info
(
"duan add,device_id:
%
s,recommend_tag_list:
%
s"
%
(
str
(
device_id
),
str
(
recommend_tag_list
)))
logging
.
info
(
"duan add,device_id:
%
s,recommend_tag_list:
%
s"
%
(
str
(
device_id
),
str
(
recommend_tag_list
)))
if
len
(
recommend_tag_
lis
t
)
>
0
:
if
len
(
recommend_tag_
dic
t
)
>
0
:
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
recommend_tag_list
))
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
list
(
recommend_tag_dict
.
keys
())
))
# Todo:设置过期时间,调研set是否支持
# Todo:设置过期时间,调研set是否支持
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
...
...
linucb/views/linucb.py
View file @
e3387b7b
...
@@ -14,9 +14,9 @@ from django.conf import settings
...
@@ -14,9 +14,9 @@ from django.conf import settings
class
LinUCB
:
class
LinUCB
:
d
=
2
d
=
2
alpha
=
0.
25
alpha
=
0.
1
r1
=
1
r1
=
1
0
r0
=
-
0.
5
r0
=
-
0.
1
default_tag_list
=
list
()
default_tag_list
=
list
()
@classmethod
@classmethod
...
@@ -70,7 +70,7 @@ class LinUCB:
...
@@ -70,7 +70,7 @@ class LinUCB:
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_
set
=
se
t
()
top_tag_
dict
=
dic
t
()
np_score_list
=
list
()
np_score_list
=
list
()
np_score_dict
=
dict
()
np_score_dict
=
dict
()
...
@@ -85,18 +85,19 @@ class LinUCB:
...
@@ -85,18 +85,19 @@ class LinUCB:
sorted_np_score_list
=
sorted
(
np_score_list
,
reverse
=
True
)
sorted_np_score_list
=
sorted
(
np_score_list
,
reverse
=
True
)
for
top_score
in
sorted_np_score_list
:
for
top_score
in
sorted_np_score_list
:
for
top_score_index
in
np_score_dict
[
top_score
]:
for
top_score_index
in
np_score_dict
[
top_score
]:
top_tag_set
.
add
(
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
))
tag_id
=
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
)
if
len
(
top_tag_set
)
>=
10
:
top_tag_dict
[
tag_id
]
=
top_score
if
len
(
top_tag_dict
)
>=
10
:
break
break
if
len
(
top_tag_
se
t
)
>=
10
:
if
len
(
top_tag_
dic
t
)
>=
10
:
break
break
logging
.
info
(
"duan add,device_id:
%
s,sorted_np_score_list:
%
s,np_score_dict:
%
s"
%
(
str
(
device_id
),
str
(
sorted_np_score_list
),
str
(
np_score_dict
)))
logging
.
info
(
"duan add,device_id:
%
s,sorted_np_score_list:
%
s,np_score_dict:
%
s"
%
(
str
(
device_id
),
str
(
sorted_np_score_list
),
str
(
np_score_dict
)))
return
list
(
top_tag_set
)
return
top_tag_dict
except
:
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
return
{}
@classmethod
@classmethod
def
init_device_id_linucb_info
(
cls
,
redis_cli
,
redis_prefix
,
device_id
,
tag_list
):
def
init_device_id_linucb_info
(
cls
,
redis_cli
,
redis_prefix
,
device_id
,
tag_list
):
...
...
search/utils/topic.py
View file @
e3387b7b
...
@@ -134,7 +134,7 @@ class TopicUtils(object):
...
@@ -134,7 +134,7 @@ class TopicUtils(object):
"""
"""
try
:
try
:
attention_user_id_list
=
list
()
attention_user_id_list
=
list
()
pick_user_id_list
=
list
()
#
pick_user_id_list = list()
# same_group_id_list = list()
# same_group_id_list = list()
user_tag_list
=
list
()
user_tag_list
=
list
()
...
@@ -145,8 +145,8 @@ class TopicUtils(object):
...
@@ -145,8 +145,8 @@ class TopicUtils(object):
attention_user_info_list
=
result_dict
[
"hits"
][
0
][
"_source"
][
"attention_user_id_list"
]
attention_user_info_list
=
result_dict
[
"hits"
][
0
][
"_source"
][
"attention_user_id_list"
]
attention_user_id_list
=
[
item
[
"user_id"
]
for
item
in
attention_user_info_list
]
attention_user_id_list
=
[
item
[
"user_id"
]
for
item
in
attention_user_info_list
]
pick_user_info_list
=
result_dict
[
"hits"
][
0
][
"_source"
][
"pick_user_id_list"
]
#
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list
=
[
item
[
"user_id"
]
for
item
in
pick_user_info_list
]
#
pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
# same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
# same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
#
#
...
@@ -165,22 +165,22 @@ class TopicUtils(object):
...
@@ -165,22 +165,22 @@ class TopicUtils(object):
"language_type"
:
1
"language_type"
:
1
}
}
},
},
"weight"
:
3
"weight"
:
4
},
},
{
{
"
linear
"
:
{
"
gauss
"
:
{
"create_time"
:
{
"create_time"
:
{
"scale"
:
"1d"
,
"scale"
:
"1d"
,
"decay"
:
0.99
"decay"
:
0.99
}
}
},
},
"weight"
:
5
00
"weight"
:
5
}
}
]
]
if
len
(
user_similar_score_list
)
>
0
:
if
len
(
user_similar_score_list
)
>
0
:
for
item
in
user_similar_score_list
[:
100
]:
for
item
in
user_similar_score_list
[:
100
]:
score_item
=
3
*
10
*
item
[
1
]
score_item
=
2
+
item
[
1
]
functions_list
.
append
(
functions_list
.
append
(
{
{
"filter"
:
{
"bool"
:
{
"filter"
:
{
"bool"
:
{
...
@@ -197,14 +197,14 @@ class TopicUtils(object):
...
@@ -197,14 +197,14 @@ class TopicUtils(object):
"weight"
:
3
,
"weight"
:
3
,
}
}
)
)
if
len
(
pick_user_id_list
)
>
0
:
#
if len(pick_user_id_list) > 0:
functions_list
.
append
(
#
functions_list.append(
{
#
{
"filter"
:
{
"bool"
:
{
#
"filter": {"bool": {
"should"
:
{
"terms"
:
{
"user_id"
:
pick_user_id_list
}}}},
#
"should": {"terms": {"user_id": pick_user_id_list}}}},
"weight"
:
2
#
"weight": 2
}
#
}
)
#
)
# if len(same_pictorial_id_list) > 0:
# if len(same_pictorial_id_list) > 0:
# functions_list.append(
# functions_list.append(
...
@@ -224,14 +224,33 @@ class TopicUtils(object):
...
@@ -224,14 +224,33 @@ class TopicUtils(object):
"weight"
:
1
"weight"
:
1
}
}
)
)
if
len
(
recommend_tag_list
)
>
0
:
if
len
(
recommend_tag_list
)
>
0
:
functions_list
.
append
(
if
len
(
recommend_tag_list
)
>
1
:
{
functions_list
+=
[
"filter"
:
{
"bool"
:
{
{
"should"
:
{
"terms"
:
{
"edit_tag_list"
:
recommend_tag_list
}}}},
"filter"
:
{
"term"
:
{
"tag_list"
:
recommend_tag_list
[
0
]}},
"weight"
:
3
"weight"
:
4
}
},
)
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
[
1
:]}},
"weight"
:
3
}
]
else
:
functions_list
.
append
(
{
"filter"
:
{
"terms"
:
{
"tag_list"
:
recommend_tag_list
}},
"weight"
:
3
}
)
# for tag_id in recommend_tag_dict:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": tag_id}},
# "weight": recommend_tag_dict[tag_id]
# }
# )
low_content_level
=
4
if
query_type
==
TopicPageType
.
FIND_PAGE
else
3
low_content_level
=
4
if
query_type
==
TopicPageType
.
FIND_PAGE
else
3
query_function_score
=
{
query_function_score
=
{
...
@@ -319,7 +338,13 @@ class TopicUtils(object):
...
@@ -319,7 +338,13 @@ class TopicUtils(object):
},
},
"order"
:
"desc"
"order"
:
"desc"
}
}
}
},
# {
# "offline_score":{
# "order": "desc"
# }
# },
"_score"
]
]
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
index_type
,
query_body
=
q
,
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
index_type
,
query_body
=
q
,
offset
=
offset
,
size
=
size
)
offset
=
offset
,
size
=
size
)
...
...
search/views/topic.py
View file @
e3387b7b
...
@@ -57,7 +57,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -57,7 +57,13 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
tag_recommend_redis_key
=
"physical:linucb:tag_recommend:device_id:"
+
str
(
device_id
)
tag_recommend_redis_key
=
"physical:linucb:tag_recommend:device_id:"
+
str
(
device_id
)
recommend_tag_list
=
[]
# recommend_tag_dict = dict()
# tag_recommend_val = redis_client.get(tag_recommend_redis_key)
# if tag_recommend_val:
# recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_tag_list
=
list
()
tag_recommend_val
=
redis_client
.
get
(
tag_recommend_redis_key
)
tag_recommend_val
=
redis_client
.
get
(
tag_recommend_redis_key
)
if
tag_recommend_val
:
if
tag_recommend_val
:
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
recommend_tag_list
=
json
.
loads
(
str
(
tag_recommend_val
,
encoding
=
"utf-8"
))
...
@@ -123,15 +129,15 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
...
@@ -123,15 +129,15 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# break
# break
have_read_topic_id_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
5
000
:
if
len
(
have_read_topic_id_list
)
>
30
000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
5
000
cut_len
=
len
(
have_read_topic_id_list
)
-
30
000
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
redis_dict
=
{
redis_dict
=
{
"have_read_topic_list"
:
json
.
dumps
(
have_read_topic_id_list
),
"have_read_topic_list"
:
json
.
dumps
(
have_read_topic_id_list
),
}
}
redis_client
.
hmset
(
redis_key
,
redis_dict
)
redis_client
.
hmset
(
redis_key
,
redis_dict
)
# 每个session key保存15分钟
# 每个session key保存15分钟
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
3
)
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
3
0
)
return
topic_id_list
return
topic_id_list
except
:
except
:
...
...
trans2es/models/user_extra.py
View file @
e3387b7b
...
@@ -15,7 +15,8 @@ class UserExtra(models.Model):
...
@@ -15,7 +15,8 @@ class UserExtra(models.Model):
db_table
=
"user_extra"
db_table
=
"user_extra"
id
=
models
.
IntegerField
(
verbose_name
=
"主键ID"
,
primary_key
=
True
)
id
=
models
.
IntegerField
(
verbose_name
=
"主键ID"
,
primary_key
=
True
)
user_id
=
models
.
BigIntegerField
(
verbose_name
=
u"用户ID"
)
user_id
=
models
.
CharField
(
verbose_name
=
u"用户ID"
,
max_length
=
100
)
is_shadow
=
models
.
BooleanField
(
verbose_name
=
u"是否是马甲账户"
)
is_shadow
=
models
.
BooleanField
(
verbose_name
=
u"是否是马甲账户"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否上线"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否上线"
)
is_recommend
=
models
.
BooleanField
(
verbose_name
=
u"是否推荐"
)
is_recommend
=
models
.
BooleanField
(
verbose_name
=
u"是否推荐"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment