Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
钟尚武
physical
Commits
8959570e
Commit
8959570e
authored
Nov 28, 2018
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify
parent
8fcf5a5c
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
255 additions
and
64 deletions
+255
-64
pick.py
search/utils/pick.py
+3
-2
topic.py
search/utils/topic.py
+122
-4
pick.py
search/views/pick.py
+2
-1
topic.py
search/views/topic.py
+40
-50
topic.json
trans2es/mapping/topic.json
+2
-0
group.py
trans2es/models/group.py
+3
-2
topic.py
trans2es/models/topic.py
+64
-3
user.py
trans2es/models/user.py
+17
-2
topic_transfer.py
trans2es/utils/topic_transfer.py
+2
-0
No files found.
search/utils/pick.py
View file @
8959570e
...
...
@@ -4,6 +4,7 @@
import
logging
import
traceback
import
datetime
import
json
from
libs.es
import
ESPerform
from
libs.cache
import
redis_client
...
...
@@ -13,7 +14,7 @@ class PickUtils(object):
def
refresh_redis_data
(
cls
,
have_read_celebrity_list
,
redis_cli
,
redis_key
,
key_exist
):
try
:
if
key_exist
:
redis_cli
.
set
(
redis_key
,
have_read_celebrity_list
)
redis_cli
.
set
(
redis_key
,
json
.
dumps
(
have_read_celebrity_list
)
)
else
:
now
=
datetime
.
datetime
.
now
()
...
...
@@ -22,7 +23,7 @@ class PickUtils(object):
valid_seconds
=
(
end_time
-
now
)
.
seconds
redis_cli
.
setex
(
redis_key
,
have_read_celebrity_list
,
valid_seconds
)
redis_cli
.
setex
(
redis_key
,
json
.
dumps
(
have_read_celebrity_list
)
,
valid_seconds
)
return
True
except
:
...
...
search/utils/topic.py
View file @
8959570e
...
...
@@ -23,9 +23,9 @@ class TopicUtils(object):
"user_id"
:
user_id
}
q
[
"_souce"
]
=
[
"attention_user_id_list"
,
"pick_user_id_list"
,
"same_group_user_id_list"
]
q
[
"_souce"
]
=
[
"
tag_list"
,
"
attention_user_id_list"
,
"pick_user_id_list"
,
"same_group_user_id_list"
]
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
"
topic
"
,
q
,
offset
,
size
)
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
"
user
"
,
q
,
offset
,
size
)
return
result_dict
except
:
...
...
@@ -56,4 +56,122 @@ class TopicUtils(object):
return
(
chinese_user_id_list
,
japan_user_id_list
,
korea_user_id_list
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
([],[],[])
\ No newline at end of file
return
([],[],[])
@classmethod
def
refresh_redis_hash_data
(
cls
,
redis_cli
,
redis_key
,
redis_data_dict
):
try
:
redis_cli
.
hmset
(
redis_key
,
redis_data_dict
)
return
True
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
offset
,
size
,
is_first_time
,
group_topic_ids_index
,
not_group_topic_ids_index
):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
:param user_id:
:param offset:
:param size:
:param is_first_time:
:param group_topic_ids_index:
:param not_group_topic_ids_index:
:return:
"""
try
:
#重写offset,size
size
=
1000
if
is_first_time
:
offset
=
0
else
:
if
group_topic_ids_index
>
0
:
offset
=
1000
*
group_topic_ids_index
else
:
offset
=
1000
*
not_group_topic_ids_index
result_dict
=
TopicUtils
.
get_related_user_info
(
user_id
,
0
,
1
)
if
len
(
result_dict
[
"hits"
])
==
0
:
logging
.
error
(
"not find user_id:
%
d in es!"
%
int
(
user_id
))
return
{
"recommend_topic_ids"
:
[]}
attention_user_info_list
=
result_dict
[
"hits"
][
0
][
"attention_user_id_list"
]
(
attention_chinese_user_id_list
,
attention_japan_user_id_list
,
attention_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
related_user_id_list
=
attention_user_info_list
)
pick_user_info_list
=
result_dict
[
"hits"
][
0
][
"pick_user_id_list"
]
(
pick_chinese_user_id_list
,
pick_japan_user_id_list
,
pick_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
pick_user_info_list
)
same_group_user_info_list
=
result_dict
[
"hits"
][
0
][
"same_group_user_id_list"
]
(
same_group_chinese_user_id_list
,
same_group_japan_user_id_list
,
same_group_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
same_group_user_info_list
)
user_tag_list
=
result_dict
[
"hits"
][
0
][
"tag_list"
]
q
=
dict
()
q
[
"filter"
]
=
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
5
}}
}
if
not
is_first_time
:
if
group_topic_ids_index
:
q
[
"filter"
][
"range"
][
"group_id"
]
=
{
"gt"
:
0
}
else
:
q
[
"filter"
][
"range"
][
"group_id"
]
=
{
"lt"
:
0
}
functions_list
=
[
{
"filter"
:
{
"match"
:
{
"user_id"
:
attention_chinese_user_id_list
+
attention_japan_user_id_list
+
attention_korea_user_id_list
}},
"weight"
:
5
,
},
{
"filter"
:
{
"match"
:
{
"user_id"
:
pick_chinese_user_id_list
+
pick_japan_user_id_list
+
pick_korea_user_id_list
}},
"weight"
:
3
},
{
"filter"
:
{
"match"
:
{
"user_id"
:
same_group_chinese_user_id_list
+
same_group_japan_user_id_list
+
same_group_korea_user_id_list
}},
"weight"
:
2
}
]
query_function_score
=
{
"query"
:
{
"match"
:
{
"tag_list"
:
user_tag_list
}
},
"score_mode"
:
"sum"
,
"boost_mode"
:
"sum"
,
"functions"
:
functions_list
}
q
[
"query"
]
=
{
"function_score"
:
query_function_score
}
q
[
"_source"
]
=
{
"include"
:[
"id"
,
"group_id"
]
}
logging
.
info
(
"duan add,home_recommend es query:
%
s"
%
str
(
q
)
.
encode
(
"utf-8"
))
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
offset
,
size
=
size
)
group_topic_ids
=
list
()
not_group_topic_ids
=
list
()
for
item
in
result_dict
[
"hits"
]:
if
item
[
"group_id"
]
>
0
:
group_topic_ids
.
append
(
item
[
"id"
])
else
:
not_group_topic_ids
.
append
(
item
[
"id"
])
return
(
group_topic_ids
,
not_group_topic_ids
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
([],[])
\ No newline at end of file
search/views/pick.py
View file @
8959570e
...
...
@@ -18,9 +18,10 @@ def pick_celebrity(user_id,pick_id,offset=0,size=10):
try
:
redis_celebrity_info
=
redis_client
.
get
(
redis_key
)
have_read_pick_celebrity_ids
=
json
.
loads
(
redis_celebrity_info
)
have_read_pick_celebrity_ids
=
json
.
loads
(
redis_celebrity_info
)
if
redis_celebrity_info
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
pass
q
=
dict
()
q
[
"query"
]
=
{
...
...
search/views/topic.py
View file @
8959570e
...
...
@@ -4,7 +4,10 @@
from
gm_rpcd.all
import
bind
import
logging
import
traceback
import
json
from
search.utils.topic
import
TopicUtils
from
libs.es
import
ESPerform
from
libs.cache
import
redis_client
@bind
(
"physical/search/home_recommend"
)
...
...
@@ -14,61 +17,48 @@ def home_recommend(user_id=-1,offset=0,size=10):
:return:
"""
try
:
# 马甲账号集合
vest_account_list
=
list
()
redis_key
=
"physical:home_recommend:"
+
"user_id:"
+
str
(
user_id
)
redis_val_dict
=
redis_client
.
hgetall
(
redis_key
)
if
len
(
redis_val_dict
)
>
0
:
group_topic_ids
=
json
.
loads
(
redis_val_dict
[
"group_topic_ids"
])
not_group_topic_ids
=
json
.
loads
(
redis_val_dict
[
"not_group_topic_ids"
])
group_topic_ids_index
=
redis_val_dict
[
"group_topic_ids_index"
]
not_group_topic_ids_index
=
redis_val_dict
[
"not_group_topic_ids_index"
]
result_dict
=
TopicUtils
.
get_related_user_info
(
user_id
,
offset
,
size
)
if
len
(
result_dict
[
"hits"
])
==
0
:
logging
.
error
(
"Not find user_id:
%
d in es!"
%
int
(
user_id
))
return
{
"recommend_topic_ids"
:[]}
if
len
(
group_topic_ids
)
<
(
size
-
1
)
or
len
(
not_group_topic_ids
)
<
1
:
if
len
(
group_topic_ids
)
<
(
size
-
1
):
group_topic_ids_index
+=
1
(
new_group_topic_ids
,
new_not_group_topic_ids
)
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
offset
,
size
,
False
,
group_topic_ids_index
,
not_group_topic_ids_index
)
group_topic_ids
+=
new_group_topic_ids
else
:
not_group_topic_ids_index
+=
1
(
new_group_topic_ids
,
new_not_group_topic_ids
)
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
offset
,
size
,
False
,
group_topic_ids_index
,
not_group_topic_ids_index
)
not_group_topic_ids
+=
new_not_group_topic_ids
attention_user_info_list
=
result_dict
[
"hits"
][
0
][
"attention_user_id_list"
]
(
attention_chinese_user_id_list
,
attention_japan_user_id_list
,
attention_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
related_user_id_list
=
attention_user_info_list
)
pick_user_info_list
=
result_dict
[
"hits"
][
0
][
"pick_user_id_list"
]
(
pick_chinese_user_id_list
,
pick_japan_user_id_list
,
pick_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
pick_user_info_list
)
same_group_user_info_list
=
result_dict
[
"hits"
][
0
][
"same_group_user_id_list"
]
(
same_group_chinese_user_id_list
,
same_group_japan_user_id_list
,
same_group_korea_user_id_list
)
=
TopicUtils
.
analyze_related_user_id_list
(
same_group_user_info_list
)
q
=
dict
()
q
[
"filter"
]
=
{
"range"
:{
"content_level"
:{
"gte"
:
3
,
"lte"
:
5
}}
}
query_function_score
=
dict
()
functions
=
[
{
"filter"
:{
"match"
:{
"user_id"
:
attention_chinese_user_id_list
}},
"weight"
:
5
,
},
{
"filter"
:{
"match"
:{
"user_id"
:
pick_chinese_user_id_list
}},
"weight"
:
3
},
{
"filter"
:
{
"match"
:
{
"user_id"
:
same_group_chinese_user_id_list
}},
"weight"
:
2
},
{
"filter"
:
{
"match"
:
{
"user_id"
:
vest_account_list
}},
"weight"
:
0.5
},
{
"filter"
:{
"match"
:{
"content_level"
:
5
}},
"weight"
:
5
},
{
"filter"
:{
"match"
:{
"content_level"
:
4
}},
"weight"
:
3
},
{
"filter"
:{
"match"
:{
"content_level"
:
3
}},
"weight"
:
2
recommend_topic_ids
=
group_topic_ids
[:(
size
-
1
)]
+
not_group_topic_ids
[:
1
]
redis_hash_dict
=
{
"group_topic_ids"
:
group_topic_ids
[(
size
-
1
):],
"not_group_topic_ids"
:
not_group_topic_ids
[
1
:],
"group_topic_ids_index"
:
group_topic_ids_index
,
"not_group_topic_ids_index"
:
not_group_topic_ids_index
}
]
TopicUtils
.
refresh_redis_hash_data
(
redis_client
,
redis_key
,
redis_hash_dict
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
else
:
(
group_topic_ids
,
not_group_topic_ids
)
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
,
offset
,
size
,
True
,
0
,
0
)
recommend_topic_ids
=
group_topic_ids
[:(
size
-
1
)]
+
not_group_topic_ids
[:
1
]
redis_hash_dict
=
{
"group_topic_ids"
:
group_topic_ids
[(
size
-
1
):],
"not_group_topic_ids"
:
not_group_topic_ids
[
1
:],
"group_topic_ids_index"
:
0
,
"not_group_topic_ids_index"
:
0
}
TopicUtils
.
refresh_redis_hash_data
(
redis_client
,
redis_key
,
redis_hash_dict
)
return
{
"recommend_topic_ids"
:
recommend_topic_ids
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
trans2es/mapping/topic.json
View file @
8959570e
...
...
@@ -10,9 +10,11 @@
"content"
:{
"type"
:
"text"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"share_num"
:{
"type"
:
"long"
},
"pick_id_list"
:{
"type"
:
"long"
},
"offline_score"
:{
"type"
:
"double"
},
//离线算分
"create_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
}
}
...
...
trans2es/models/group.py
View file @
8959570e
...
...
@@ -14,14 +14,15 @@ class Group(models.Model):
verbose_name
=
u'小组'
db_table
=
'group'
id
=
models
.
IntegerField
(
verbose_name
=
u"小组ID"
,
primary_key
=
True
)
name
=
models
.
CharField
(
verbose_name
=
u'名称'
,
max_length
=
100
,
default
=
''
)
description
=
models
.
CharField
(
verbose_name
=
u'描述'
,
max_length
=
200
,
default
=
''
)
creator_id
=
models
.
IntegerField
(
verbose_name
=
u'创建者ID'
)
#icon = ImgUrlField('图标', max_length=255, img_type=IMG_TYPE.WATERMARK, default='')
icon
=
models
.
CharField
(
verbose_name
=
u'图标'
,
max_length
=
128
,
default
=
''
)
topic_num
=
models
.
IntegerField
(
verbose_name
=
u'帖子数'
,
default
=
0
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
,
db_index
=
True
)
is_recommend
=
models
.
BooleanField
(
verbose_name
=
u"是否推荐"
,
default
=
False
,
db_index
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
)
is_recommend
=
models
.
BooleanField
(
verbose_name
=
u"是否推荐"
,
default
=
False
)
create_time
=
models
.
DateTimeField
(
verbose_name
=
u'创建时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
...
...
trans2es/models/topic.py
View file @
8959570e
...
...
@@ -10,6 +10,21 @@ from django.db import models
import
datetime
from
.pick_topic
import
PickTopic
from
.tag
import
TopicTag
from
.user
import
UserExtra
from
.group
import
Group
class
ActionSumAboutTopic
(
models
.
Model
):
class
Meta
:
verbose_name
=
u"帖子埋点数据汇总"
db_table
=
"action_sum_about_topic"
partiton_date
=
models
.
CharField
(
verbose_name
=
u"日期"
,
max_length
=
20
)
device_id
=
models
.
CharField
(
verbose_name
=
u"用户设备号"
,
max_length
=
50
)
topic_id
=
models
.
CharField
(
verbose_name
=
u"帖子ID"
,
max_length
=
50
)
user_id
=
models
.
CharField
(
verbose_name
=
u"用户ID"
,
max_length
=
50
)
data_type
=
models
.
IntegerField
(
verbose_name
=
u"动作类型"
)
data_value
=
models
.
BigIntegerField
(
verbose_name
=
u"值"
)
class
Topic
(
models
.
Model
):
class
Meta
:
...
...
@@ -18,7 +33,7 @@ class Topic(models.Model):
id
=
models
.
IntegerField
(
verbose_name
=
'日记ID'
,
primary_key
=
True
)
name
=
models
.
CharField
(
verbose_name
=
'日记名称'
,
max_length
=
100
)
group_id
=
models
.
IntegerField
(
verbose_name
=
'用户所在组ID'
)
group_id
=
models
.
IntegerField
(
verbose_name
=
'用户所在组ID'
,
default
=-
1
)
user_id
=
models
.
IntegerField
(
verbose_name
=
'用户ID'
)
description
=
models
.
CharField
(
verbose_name
=
'日记本描述'
,
max_length
=
200
)
content
=
models
.
CharField
(
verbose_name
=
'日记本内容'
,
max_length
=
1000
)
...
...
@@ -56,4 +71,50 @@ class Topic(models.Model):
return
topic_tag_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
None
\ No newline at end of file
return
None
def
get_topic_offline_score
(
self
):
try
:
offline_score
=
0.0
user_is_shadow
=
False
#是否官方推荐用户
user_query_results
=
UserExtra
.
objects
.
filter
(
user_id
=
self
.
user_id
)
if
user_query_results
.
count
()
>
0
:
if
user_query_results
[
0
]
.
is_recommend
:
offline_score
+=
2.0
elif
user_query_results
[
0
]
.
is_shadow
:
user_is_shadow
=
True
#是否官方推荐小组
group_query_results
=
Group
.
objects
.
filter
(
id
=
self
.
group_id
)
if
group_query_results
.
count
()
>
0
:
if
group_query_results
[
0
]
.
is_recommend
:
offline_score
+=
4.0
#帖子等级
if
self
.
content_level
==
'5'
:
offline_score
+=
5.0
elif
self
.
content_level
==
'4'
:
offline_score
+=
3.0
elif
self
.
content_level
==
'3'
:
offline_score
+=
2.0
exposure_count
=
ActionSumAboutTopic
.
objects
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
if
exposure_count
>
0
:
offline_score
+=
click_count
/
exposure_count
if
uv_num
>
0
:
offline_score
+=
(
self
.
vote_num
/
uv_num
+
self
.
reply_num
/
uv_num
)
"""
1:马甲账号是否对总分降权?
"""
if
user_is_shadow
:
offline_score
=
offline_score
*
0.5
return
offline_score
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
0.0
\ No newline at end of file
trans2es/models/user.py
View file @
8959570e
...
...
@@ -94,4 +94,20 @@ class User(models.Model):
return
user_tag_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
None
\ No newline at end of file
return
None
class
UserExtra
(
models
.
Model
):
class
Meta
:
verbose_name
=
u"推荐用户表"
db_table
=
"user_extra"
id
=
models
.
IntegerField
(
verbose_name
=
"主键ID"
,
primary_key
=
True
)
user_id
=
models
.
IntegerField
(
verbose_name
=
u"用户ID"
)
is_shadow
=
models
.
BooleanField
(
verbose_name
=
u"是否是马甲账户"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否上线"
)
is_recommend
=
models
.
BooleanField
(
verbose_name
=
u"是否推荐"
)
has_answered
=
models
.
BooleanField
(
verbose_name
=
u""
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u""
)
create_time
=
models
.
DateTimeField
(
verbose_name
=
u'创建时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
trans2es/utils/topic_transfer.py
View file @
8959570e
...
...
@@ -22,10 +22,12 @@ class TopicTransfer(object):
res
[
"content"
]
=
instance
.
content
res
[
"content_level"
]
=
instance
.
content_level
res
[
"user_id"
]
=
instance
.
user_id
res
[
"group_id"
]
=
instance
.
group_id
res
[
"share_num"
]
=
instance
.
share_num
res
[
"pick_id_list"
]
=
instance
.
get_pick_id_info
()
res
[
"tag_list"
]
=
instance
.
get_topic_tag_id_list
()
res
[
"offline_score"
]
=
instance
.
get_topic_offline_score
()
create_time
=
instance
.
create_time
res
[
"create_time"
]
=
tzlc
(
create_time
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment