Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
d964c416
Commit
d964c416
authored
Mar 11, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify
parent
a9ac8ab8
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
73 additions
and
131 deletions
+73
-131
linucb.py
linucb/views/linucb.py
+10
-2
topic.py
search/utils/topic.py
+18
-5
topic.py
search/views/topic.py
+0
-86
topic.json
trans2es/mapping/topic.json
+1
-1
topic.py
trans2es/models/topic.py
+1
-0
type_info.py
trans2es/type_info.py
+42
-37
topic_transfer.py
trans2es/utils/topic_transfer.py
+1
-0
No files found.
linucb/views/linucb.py
View file @
d964c416
...
...
@@ -59,9 +59,17 @@ class LinUCB:
AaI_tmp
=
np
.
array
(
Aa_list
)
theta_tmp
=
np
.
array
(
theta_list
)
art_max
=
tag_list
[
np
.
argmax
(
np
.
dot
(
xaT
,
theta_tmp
)
+
cls
.
alpha
*
np
.
sqrt
(
np
.
dot
(
np
.
dot
(
xaT
,
AaI_tmp
),
xa
)))]
top_tag_list_len
=
len
(
tag_list
)
/
3
top_np_ind
=
np
.
argpartition
(
np
.
dot
(
xaT
,
theta_tmp
)
+
cls
.
alpha
*
np
.
sqrt
(
np
.
dot
(
np
.
dot
(
xaT
,
AaI_tmp
),
xa
)),
-
top_tag_list_len
)[
-
top_tag_list_len
:]
return
[
int
(
art_max
)]
top_tag_list
=
list
()
top_np_list
=
top_np_ind
.
tolist
()
for
tag_id
in
top_np_list
:
top_tag_list
.
append
(
tag_id
)
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
return
top_tag_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
...
...
search/utils/topic.py
View file @
d964c416
...
...
@@ -153,10 +153,6 @@ class TopicUtils(object):
user_tag_list
=
result_dict
[
"hits"
][
0
][
"_source"
][
"tag_list"
]
# attention_user_id_term_list = cls.___get_should_term_list(attention_user_id_list,field_name="user_id")
# pick_user_id_term_list = cls.___get_should_term_list(pick_user_id_list,field_name="user_id")
# same_group_user_id_term_list = cls.___get_should_term_list(same_group_id_list,field_name="user_id")
q
=
dict
()
q
[
"query"
]
=
dict
()
...
...
@@ -244,7 +240,24 @@ class TopicUtils(object):
{
"term"
:
{
"has_image"
:
True
}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
]
],
"should"
:
[
{
"bool"
:{
"must"
:[
{
"term"
:{
"has_image"
:
True
}},
{
"term"
:
{
"has_video"
:
False
}}
]
}
},{
"bool"
:{
"must"
:{
"term"
:{
"has_video"
:
True
}
}
}
}
],
"minimum_should_match"
:
1
}
},
"score_mode"
:
"sum"
,
...
...
search/views/topic.py
View file @
d964c416
...
...
@@ -130,92 +130,6 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
"""
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_field_list = [b'last_offset_num', b'unread_topic_id']
for page_id in range(0,offset,size):
redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
recommend_topic_ids = []
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type)
have_read_group_id_set = set()
unread_topic_id_dict = dict()
have_read_topic_id_set = set()
if redis_field_val_list[1] and offset>0:
if (user_id>0 and offset==last_offset_num) or user_id==-1:
ori_unread_topic_id_dict = json.loads(redis_field_val_list[1])
if len(ori_unread_topic_id_dict) > 0:
topic_id_dict.update(ori_unread_topic_id_dict)
for have_read_item in redis_field_val_list[2:]:
if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量
cur_page_group_num = 0
# 当前页用户数量
cur_page_user_num = 0
for topic_id in topic_id_dict:
if topic_id_dict[topic_id] in have_read_group_id_set:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
else:
if topic_id not in have_read_topic_id_set:
if isinstance(topic_id_dict[topic_id],int) and topic_id_dict[topic_id]>0 and cur_page_group_num<(size*0.9):
have_read_group_id_set.add(topic_id_dict[topic_id])
have_read_topic_id_set.add(topic_id)
cur_page_group_num += 1
recommend_topic_ids.append(topic_id)
elif topic_id_dict[topic_id] and cur_page_user_num<(size*0.1):
cur_page_user_num += 1
recommend_topic_ids.append(topic_id)
else:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
if len(recommend_topic_ids) >= size:
break
offi_unread_topic_id_dict = dict()
if len(recommend_topic_ids) < size and len(unread_topic_id_dict)>0:
for unread_topic_id in unread_topic_id_dict:
if len(recommend_topic_ids)<size:
recommend_topic_ids.append(unread_topic_id)
else:
offi_unread_topic_id_dict[unread_topic_id] = unread_topic_id_dict[unread_topic_id]
redis_dict = {
"unread_topic_id":json.dumps(offi_unread_topic_id_dict),
"last_offset_num":offset+size,
offset: json.dumps(recommend_topic_ids)
}
redis_client.hmset(redis_key,redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key,15*60)
return recommend_topic_ids
except:
logging.error("catch exception,err_msg:
%
s"
%
traceback.format_exc())
return []
"""
@bind
(
"physical/search/query_tag_id_by_topic"
)
def
query_tag_id_by_topic
(
offset
=
0
,
size
=
10
,
topic_id_list
=
[],
user_id
=-
1
):
try
:
...
...
trans2es/mapping/topic.json
View file @
d964c416
...
...
@@ -14,13 +14,13 @@
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
"tag_name_list"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"share_num"
:{
"type"
:
"long"
},
"pick_id_list"
:{
"type"
:
"long"
},
"offline_score"
:{
"type"
:
"double"
},
//离线算分
"manual_score"
:{
"type"
:
"double"
},
//人工赋分
"has_image"
:{
"type"
:
"boolean"
},
//是否有图
"has_video"
:{
"type"
:
"boolean"
},
//是否是视频
"create_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"create_time_val"
:{
"type"
:
"long"
},
...
...
trans2es/models/topic.py
View file @
d964c416
...
...
@@ -54,6 +54,7 @@ class Topic(models.Model):
Group
,
verbose_name
=
u"关联的小组"
,
related_name
=
u"group_topics"
,
null
=
True
,
blank
=
True
,
default
=
None
,
on_delete
=
models
.
CASCADE
)
user_id
=
models
.
IntegerField
(
verbose_name
=
u'用户ID'
)
has_video
=
models
.
IntegerField
(
verbose_name
=
u'是否是视频日记'
)
drop_score
=
models
.
IntegerField
(
verbose_name
=
u'人工赋分'
,
default
=
0
)
description
=
models
.
CharField
(
verbose_name
=
u'日记本描述'
,
max_length
=
200
)
content
=
models
.
CharField
(
verbose_name
=
u'日记本内容'
,
max_length
=
1000
)
...
...
trans2es/type_info.py
View file @
d964c416
...
...
@@ -185,43 +185,48 @@ class TypeInfo(object):
)
def
insert_table_chunk
(
self
,
sub_index_name
,
table_chunk
,
es
=
None
):
start_clock
=
time
.
clock
()
start_time
=
time
.
time
()
instance_list
=
list
(
table_chunk
)
stage_1_time
=
time
.
time
()
data_list
=
self
.
bulk_get_data
(
instance_list
)
stage_2_time
=
time
.
time
()
es_result
=
ESPerform
.
es_helpers_bulk
(
es_cli
=
es
,
data_list
=
data_list
,
sub_index_name
=
sub_index_name
,
auto_create_index
=
True
)
stage_3_time
=
time
.
time
()
end_clock
=
time
.
clock
()
return
(
'{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}'
)
.
format
(
datetime
=
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S.
%
f'
),
index_prefix
=
sub_index_name
,
type_name
=
self
.
name
,
pk_start
=
repr
(
table_chunk
.
get_pk_start
()),
pk_stop
=
repr
(
table_chunk
.
get_pk_stop
()),
count
=
len
(
instance_list
),
stage_1_duration
=
stage_1_time
-
start_time
,
stage_2_duration
=
stage_2_time
-
stage_1_time
,
stage_3_duration
=
stage_3_time
-
stage_2_time
,
clock_duration
=
end_clock
-
start_clock
,
response
=
es_result
,
)
try
:
start_clock
=
time
.
clock
()
start_time
=
time
.
time
()
instance_list
=
list
(
table_chunk
)
stage_1_time
=
time
.
time
()
data_list
=
self
.
bulk_get_data
(
instance_list
)
stage_2_time
=
time
.
time
()
es_result
=
ESPerform
.
es_helpers_bulk
(
es_cli
=
es
,
data_list
=
data_list
,
sub_index_name
=
sub_index_name
,
auto_create_index
=
True
)
logging
.
info
(
"es_helpers_bulk,sub_index_name:
%
s,data_list len:
%
d"
%
(
sub_index_name
,
len
(
data_list
)))
stage_3_time
=
time
.
time
()
end_clock
=
time
.
clock
()
return
(
'{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}'
)
.
format
(
datetime
=
datetime
.
datetime
.
now
()
.
strftime
(
'
%
Y-
%
m-
%
dT
%
H:
%
M:
%
S.
%
f'
),
index_prefix
=
sub_index_name
,
type_name
=
self
.
name
,
pk_start
=
repr
(
table_chunk
.
get_pk_start
()),
pk_stop
=
repr
(
table_chunk
.
get_pk_stop
()),
count
=
len
(
instance_list
),
stage_1_duration
=
stage_1_time
-
start_time
,
stage_2_duration
=
stage_2_time
-
stage_1_time
,
stage_3_duration
=
stage_3_time
-
stage_2_time
,
clock_duration
=
end_clock
-
start_clock
,
response
=
es_result
,
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
None
_get_type_info_map_result
=
None
...
...
trans2es/utils/topic_transfer.py
View file @
d964c416
...
...
@@ -57,6 +57,7 @@ class TopicTransfer(object):
res
[
"manual_score"
]
=
instance
.
drop_score
res
[
"has_image"
]
=
instance
.
topic_has_image
()
res
[
"has_video"
]
=
instance
.
has_video
res
[
"language_type"
]
=
instance
.
language_type
# # 片假名
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment