Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
8365b095
Commit
8365b095
authored
Apr 12, 2019
by
zhanglu
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into 'test'
Master See merge request
!243
parents
cdde760f
f4561b4c
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
291 additions
and
176 deletions
+291
-176
Project.xml
.idea/codeStyles/Project.xml
+0
-25
misc.xml
.idea/misc.xml
+1
-1
physical.iml
.idea/physical.iml
+1
-1
tasks.py
injection/data_sync/tasks.py
+18
-14
es.py
libs/es.py
+49
-25
register_user_tag.py
linucb/utils/register_user_tag.py
+48
-40
collect_data.py
linucb/views/collect_data.py
+0
-0
linucb.py
linucb/views/linucb.py
+7
-2
topic.py
search/utils/topic.py
+5
-5
tag.py
search/views/tag.py
+8
-28
topic.py
search/views/topic.py
+9
-4
topic-high-star.json
trans2es/mapping/topic-high-star.json
+4
-2
topic-star-routing.json
trans2es/mapping/topic-star-routing.json
+4
-2
topic.json
trans2es/mapping/topic.json
+3
-2
pictorial.py
trans2es/models/pictorial.py
+12
-5
tag.py
trans2es/models/tag.py
+2
-0
topic.py
trans2es/models/topic.py
+32
-13
type_info.py
trans2es/type_info.py
+43
-6
excellect_topic_transfer.py
trans2es/utils/excellect_topic_transfer.py
+34
-0
topic_transfer.py
trans2es/utils/topic_transfer.py
+11
-1
No files found.
.idea/codeStyles/Project.xml
View file @
8365b095
...
...
@@ -24,29 +24,5 @@
<option
name=
"ITERATION_ELEMENTS_WRAPPING"
value=
"chop_down_if_not_single"
/>
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options
enabled=
"false"
>
<option
name=
"KEYWORD_CASE"
value=
"lower"
/>
<option
name=
"FUNCTION_CASE"
value=
"lower"
/>
<option
name=
"PARAMETER_CASE"
value=
"lower"
/>
<option
name=
"DATATYPE_CASE"
value=
"lower"
/>
<option
name=
"OBJECT_CASE"
value=
"preserve"
/>
</case-options>
<formatting-settings
enabled=
"false"
/>
</DBN-PSQL>
<DBN-SQL>
<case-options
enabled=
"false"
>
<option
name=
"KEYWORD_CASE"
value=
"lower"
/>
<option
name=
"FUNCTION_CASE"
value=
"lower"
/>
<option
name=
"PARAMETER_CASE"
value=
"lower"
/>
<option
name=
"DATATYPE_CASE"
value=
"lower"
/>
<option
name=
"OBJECT_CASE"
value=
"preserve"
/>
</case-options>
<formatting-settings
enabled=
"false"
>
<option
name=
"STATEMENT_SPACING"
value=
"one_line"
/>
<option
name=
"CLAUSE_CHOP_DOWN"
value=
"chop_down_if_statement_long"
/>
<option
name=
"ITERATION_ELEMENTS_WRAPPING"
value=
"chop_down_if_not_single"
/>
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
.idea/misc.xml
View file @
8365b095
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
venv
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
physical1
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"PyCharmProfessionalAdvertiser"
>
<option
name=
"shown"
value=
"true"
/>
</component>
...
...
.idea/physical.iml
View file @
8365b095
...
...
@@ -2,7 +2,7 @@
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"
inheritedJdk
"
/>
<orderEntry
type=
"
jdk"
jdkName=
"Python 3.6 (physical1)"
jdkType=
"Python SDK
"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
<component
name=
"TestRunnerService"
>
...
...
injection/data_sync/tasks.py
View file @
8365b095
...
...
@@ -11,22 +11,26 @@ import traceback
from
libs.cache
import
redis_client
from
trans2es.models.face_user_contrast_similar
import
FaceUserContrastSimilar
,
UserSimilarScore
import
json
from
linucb.utils.register_user_tag
import
RegisterUserTag
@shared_task
def
write_to_es
(
es_type
,
pk_list
,
use_batch_query_set
=
False
):
try
:
pk_list
=
list
(
frozenset
(
pk_list
))
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"duan add,es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
if
es_type
==
"register_user_tag"
:
RegisterUserTag
.
get_register_user_tag
(
pk_list
)
else
:
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"consume es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -37,7 +41,7 @@ def sync_face_similar_data_to_redis():
result_items
=
FaceUserContrastSimilar
.
objects
.
filter
(
is_online
=
True
,
is_deleted
=
False
)
.
distinct
()
.
values
(
"participant_user_id"
)
.
values_list
(
"participant_user_id"
,
flat
=
True
)
logging
.
info
(
"
duan add,
begin sync_face_similar_data_to_redis!"
)
logging
.
info
(
"begin sync_face_similar_data_to_redis!"
)
redis_key_prefix
=
"physical:user_similar:participant_user_id:"
for
participant_user_id
in
result_items
:
...
...
@@ -58,7 +62,7 @@ def sync_face_similar_data_to_redis():
)
redis_client
.
set
(
redis_key
,
json
.
dumps
(
item_list
))
logging
.
info
(
"
duan add,
participant_user_id:
%
d set data done!"
%
participant_user_id
)
logging
.
info
(
"participant_user_id:
%
d set data done!"
%
participant_user_id
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -68,7 +72,7 @@ def sync_user_similar_score():
results_items
=
UserSimilarScore
.
objects
.
filter
(
is_deleted
=
False
)
.
distinct
()
.
values
(
"user_id"
)
.
values_list
(
"user_id"
,
flat
=
True
)
redis_key_prefix
=
"physical:user_similar_score:user_id:"
logging
.
info
(
"
duan add,
begin sync user_similar_score!"
)
logging
.
info
(
"begin sync user_similar_score!"
)
for
user_id
in
results_items
:
redis_key
=
redis_key_prefix
+
str
(
user_id
)
similar_results_items
=
UserSimilarScore
.
objects
.
filter
(
is_deleted
=
False
,
user_id
=
user_id
)
.
order_by
(
"-score"
)
...
...
libs/es.py
View file @
8365b095
...
...
@@ -155,25 +155,27 @@ class ESPerform(object):
bulk_actions
=
[]
if
sub_index_name
==
"topic"
:
if
sub_index_name
==
"topic"
or
sub_index_name
==
"topic-star-routing"
:
for
data
in
data_list
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
'routing'
:
data
[
"content_level"
]
})
if
data
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
'routing'
:
data
[
"content_level"
]
})
else
:
for
data
in
data_list
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
})
if
data
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
})
elasticsearch
.
helpers
.
bulk
(
es_cli
,
bulk_actions
)
return
True
...
...
@@ -262,29 +264,51 @@ class ESPerform(object):
return
True
@classmethod
def
get_tag_topic_list
(
cls
,
tag_id
):
def
get_tag_topic_list
(
cls
,
tag_id
,
have_read_topic_id_list
):
try
:
functions_list
=
list
()
for
id
in
tag_id
:
functions_list
.
append
(
{
"filter"
:
{
"term"
:
{
"tag_list"
:
id
}},
"weight"
:
1
}
)
q
=
{
"query"
:{
"bool"
:{
"must"
:[
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
5
}}},
{
"term"
:{
"is_online"
:
True
}},
{
"term"
:{
"is_deleted"
:
False
}},
{
"term"
:{
"tag_list"
:
tag_id
}}
]
"function_score"
:{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
5
}}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}},
{
"terms"
:
{
"tag_list"
:
tag_id
}}
]
}
},
"boost_mode"
:
"sum"
,
"score_mode"
:
"sum"
,
"functions"
:
functions_list
}
},
"_source"
:{
"include"
:[
"id"
]
},
"sort"
:[
{
"_score"
:
{
"order"
:
"desc"
}},
{
"create_time_val"
:{
"order"
:
"desc"
}},
{
"language_type"
:{
"order"
:
"asc"
}},
]
}
if
len
(
have_read_topic_id_list
)
>
0
:
q
[
"query"
][
"function_score"
][
"query"
][
"bool"
][
"must_not"
]
=
{
"terms"
:{
"id"
:
have_read_topic_id_list
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
0
,
size
=
5000
,
routing
=
"
4,5"
)
offset
=
0
,
size
=
100
,
routing
=
"3,
4,5"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
return
topic_id_list
...
...
linucb/utils/register_user_tag.py
View file @
8365b095
...
...
@@ -28,54 +28,62 @@ class RegisterUserTag(object):
linucb_device_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:device_id:"
linucb_user_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key
=
"physical:linucb:register_user_tag_info"
@classmethod
def
get_register_user_tag
(
cls
,
pk_list
):
try
:
user_id_set
=
set
()
# user_id_set = set()
user_id_dict
=
dict
()
query_results
=
AccountUserTag
.
objects
.
filter
(
pk__in
=
pk_list
)
for
item
in
query_results
:
tag_id
=
item
.
tag_id
user_id
=
item
.
user
user_tag_list
=
AccountUserTag
.
objects
.
filter
(
user
=
user_id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
user_id_dict
[
user_id
]
=
user_tag_list
if
user_id
not
in
user_id_set
:
user_id_set
.
add
(
user_id
)
user_tag_list
=
AccountUserTag
.
objects
.
filter
(
user
=
user_id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
-
1
,
user_id
,
TopicPageType
.
HOME_RECOMMEND
)
recommend_topic_id_list
=
list
()
cycle_num
=
int
(
10000
/
len
(
user_tag_list
))
for
index
in
range
(
0
,
cycle_num
):
for
tag_id
in
user_tag_list
:
redis_tag_id_key
=
cls
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
if
len
(
tag_topic_id_list
)
>
index
:
for
topic_id
in
tag_topic_id_list
[
index
:]:
if
topic_id
not
in
have_read_topic_id_list
and
topic_id
not
in
recommend_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
redis_register_tag_topic_data
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
redis_register_tag_topic_data
)
redis_client
.
expire
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
30
*
24
*
60
*
60
)
topic_recommend_redis_key
=
cls
.
linucb_user_id_recommend_topic_id_prefix
+
str
(
user_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
redis_client
.
expire
(
topic_recommend_redis_key
,
30
*
24
*
60
*
60
)
for
user_id
in
user_id_dict
:
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
user_id_dict
[
user_id
])))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
#
# user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
#
# have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
# TopicPageType.HOME_RECOMMEND)
# recommend_topic_id_list = list()
# cycle_num = int(10000/len(user_tag_list))
# for index in range(0,cycle_num):
# for tag_id in user_tag_list:
# redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
# redis_tag_id_data = redis_client.get(redis_tag_id_key)
# tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
# if not redis_tag_id_data:
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
# redis_client.expire(redis_tag_id_key,1*24*60*60)
#
# if len(tag_topic_id_list)>index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list.append(topic_id)
# break
#
# redis_register_tag_topic_data = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor": 0
# }
# redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
# redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
#
# topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
# redis_data_dict = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor":0
# }
# redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
# redis_client.expire(topic_recommend_redis_key,30*24*60*60)
#
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
linucb/views/collect_data.py
View file @
8365b095
This diff is collapsed.
Click to expand it.
linucb/views/linucb.py
View file @
8365b095
...
...
@@ -20,11 +20,16 @@ class LinUCB:
default_tag_list
=
list
()
@classmethod
def
get_default_tag_list
(
cls
):
def
get_default_tag_list
(
cls
,
user_id
):
try
:
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
if
user_id
:
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
cls
.
default_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
return
cls
.
default_tag_list
except
:
...
...
search/utils/topic.py
View file @
8365b095
...
...
@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
single_size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
):
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
,
attention_tag_list
=
[]
):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
...
...
@@ -216,12 +216,12 @@ class TopicUtils(object):
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if
len
(
user
_tag_list
)
>
0
:
if
len
(
attention
_tag_list
)
>
0
:
functions_list
.
append
(
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"terms"
:
{
"tag_list"
:
user
_tag_list
}}}},
"weight"
:
1
"should"
:
{
"terms"
:
{
"tag_list"
:
attention
_tag_list
}}}},
"weight"
:
60
}
)
# if len(recommend_tag_list)>0:
...
...
@@ -622,7 +622,7 @@ class TopicUtils(object):
})
if
not
v
:
if
v
in
(
None
,
''
)
:
continue
if
k
in
[
"create_time_gte"
,
"create_time_lte"
]:
...
...
search/views/tag.py
View file @
8365b095
...
...
@@ -24,34 +24,6 @@ def get_highlight(fields=[]):
@bind
(
"physical/search/query_tag"
)
def
query_tag
(
query
,
offset
,
size
):
try
:
"""
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online":True}},
{"term": {"is_deleted": False}}
],
"should":[
{"multi_match":{
"query": query,
"fields":["name"],
"operator":"and"}}
],
"minimum_should_match":1
}
},
"sort":[
{"near_new_topic_num":{"order":"desc"}},
{'_score': {"order": "desc"}}
],
"_source": {
"includes": ["id", "name"]
}
}
q["highlight"] = get_highlight(["name"])
"""
q
=
{
"suggest"
:{
"tips-suggest"
:{
...
...
@@ -121,3 +93,11 @@ def query_by_tag_type(tag_type_id,offset,size):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"tag_list"
:
[]}
@bind
(
"physical/search/choice_push_tag"
)
def
choice_push_tag
(
device_id
,
user_id
):
try
:
pass
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"tag_list"
:
[]}
search/views/topic.py
View file @
8365b095
...
...
@@ -90,12 +90,16 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
attention_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
size
=
size
-
len
(
recommend_topic_list
)
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_topic_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5"
)
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5"
,
attention_tag_list
=
attention_tag_list
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
...
...
@@ -136,8 +140,9 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
recommend_topic_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
recommend_topic_list
)
topic_id_list
.
extend
(
recommend_topic_list
)
# recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
30000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
...
...
@@ -148,7 +153,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 每个session key保存15分钟
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
30
)
return
recommend_topic
_list
return
topic_id
_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
...
...
trans2es/mapping/topic-high-star.json
View file @
8365b095
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
...
...
@@ -48,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/mapping/topic-star-routing.json
View file @
8365b095
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
...
...
@@ -48,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/mapping/topic.json
View file @
8365b095
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
...
...
@@ -49,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/models/pictorial.py
View file @
8365b095
...
...
@@ -75,12 +75,19 @@ class Pictorial(models.Model):
def
get_effective
(
self
,
topic_id_list
):
try
:
topic_id_list
=
Topic
.
objects
.
filter
(
id__in
=
topic_id_list
,
content_level__in
=
[
3
,
4
,
5
,
0
],
is_online
=
True
)
.
count
()
if
topic_id_list
>=
5
:
return
True
else
:
return
False
effective_num
=
0
ret
=
False
for
topic_id
in
topic_id_list
:
topic_id_object
=
Topic
.
objects
.
filter
(
id
=
int
(
topic_id
))
.
first
()
if
topic_id_object
and
topic_id_object
.
is_online
and
int
(
topic_id_object
.
content_level
)
in
[
0
,
3
,
4
,
5
]:
effective_num
+=
1
if
effective_num
>=
5
:
ret
=
True
break
return
ret
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
...
...
trans2es/models/tag.py
View file @
8365b095
...
...
@@ -19,6 +19,8 @@ class TopicTag(models.Model):
tag_id
=
models
.
IntegerField
(
verbose_name
=
u"标签ID"
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否在线"
)
is_collection
=
models
.
IntegerField
(
verbose_name
=
u"是否编辑标签"
)
class
AccountUserTag
(
models
.
Model
):
...
...
trans2es/models/topic.py
View file @
8365b095
...
...
@@ -107,11 +107,14 @@ class Topic(models.Model):
@property
def
is_complaint
(
self
):
"""是否被举报"""
try
:
if
TopicComplaint
.
objects
.
filter
(
topic_id
=
self
.
id
,
is_online
=
True
)
.
exists
():
return
True
if
TopicComplaint
.
objects
.
filter
(
topic_id
=
self
.
id
,
is_online
=
True
)
.
exists
():
return
True
return
False
return
False
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
def
topic_has_image
(
self
):
try
:
...
...
@@ -197,14 +200,14 @@ class Topic(models.Model):
elif
self
.
content_level
==
'3'
:
offline_score
+=
2.0
exposure_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
if
exposure_count
>
0
:
offline_score
+=
click_count
/
exposure_count
if
uv_num
>
0
:
offline_score
+=
(
self
.
vote_num
/
uv_num
+
self
.
reply_num
/
uv_num
)
#
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
#
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
#
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
#
#
if exposure_count > 0:
#
offline_score += click_count / exposure_count
#
if uv_num > 0:
#
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
...
...
@@ -247,7 +250,6 @@ class PictorialTopic(models.Model):
pictorial_id
=
models
.
BigIntegerField
(
verbose_name
=
u'画报ID'
)
topic_id
=
models
.
BigIntegerField
(
verbose_name
=
u'帖子ID'
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
...
...
@@ -262,3 +264,20 @@ class TopicExtra(models.Model):
id
=
models
.
IntegerField
(
verbose_name
=
u'ID'
,
primary_key
=
True
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
,
db_index
=
True
)
virtual_vote_num
=
models
.
IntegerField
(
verbose_name
=
"帖子虚拟点赞"
)
class
ExcellentTopic
(
models
.
Model
):
"""优质帖子"""
class
Meta
:
verbose_name
=
"优质帖子"
db_table
=
"excellent_topic"
id
=
models
.
IntegerField
(
verbose_name
=
u'ID'
,
primary_key
=
True
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
,
db_index
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
excellent_type
=
models
.
IntegerField
(
verbose_name
=
u"优质类型"
,
db_index
=
True
)
create_time
=
models
.
DateTimeField
(
verbose_name
=
u'创建时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
trans2es/type_info.py
View file @
8365b095
...
...
@@ -11,17 +11,19 @@ from libs.es import ESPerform
import
elasticsearch
import
elasticsearch.helpers
import
sys
from
libs.cache
import
redis_client
import
copy
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
,
pictorial
from
trans2es.utils.user_transfer
import
UserTransfer
from
trans2es.utils.pick_celebrity_transfer
import
PickCelebrityTransfer
from
trans2es.utils.group_transfer
import
GroupTransfer
from
trans2es.utils.topic_transfer
import
TopicTransfer
from
trans2es.utils.excellect_topic_transfer
import
ExcellectTopicTransfer
from
trans2es.utils.pictorial_transfer
import
PictorialTransfer
from
trans2es.utils.celebrity_transfer
import
CelebrityTransfer
from
trans2es.utils.tag_transfer
import
TagTransfer
from
trans2es.utils.contrast_similar_transfer
import
Contrast_Similar_Transfer
from
libs.es
import
ESPerform
__es
=
None
...
...
@@ -62,6 +64,7 @@ class TypeInfo(object):
self
.
round_insert_chunk_size
=
round_insert_chunk_size
self
.
round_insert_period
=
round_insert_period
self
.
logic_database_id
=
logic_database_id
self
.
physical_topic_star
=
"physical:topic_star"
@property
def
query
(
self
):
...
...
@@ -139,9 +142,25 @@ class TypeInfo(object):
pk
,
))
else
:
data_list
.
append
(
data
)
if
self
.
type
==
"topic"
and
instance
.
content_level
and
int
(
instance
.
content_level
)
>=
4
:
topic_data_high_star_list
.
append
(
data
)
if
data
:
if
self
.
type
==
"topic"
:
ori_topic_star
=
redis_client
.
hget
(
self
.
physical_topic_star
,
data
[
"id"
])
if
not
ori_topic_star
:
redis_client
.
hset
(
self
.
physical_topic_star
,
data
[
"id"
],
data
[
"content_level"
])
else
:
int_ori_topic_star
=
int
(
str
(
ori_topic_star
,
encoding
=
"utf-8"
))
if
int_ori_topic_star
!=
data
[
"content_level"
]:
old_data
=
copy
.
deepcopy
(
data
)
old_data
[
"is_online"
]
=
False
old_data
[
"is_deleted"
]
=
True
old_data
[
"content_level"
]
=
int_ori_topic_star
data_list
.
append
(
old_data
)
redis_client
.
hset
(
self
.
physical_topic_star
,
data
[
"id"
],
data
[
"content_level"
])
data_list
.
append
(
data
)
# if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
# topic_data_high_star_list.append(data)
return
(
data_list
,
topic_data_high_star_list
)
...
...
@@ -199,14 +218,22 @@ class TypeInfo(object):
time2
=
end
-
begin
begin
=
time
.
time
()
logging
.
info
(
"get sub_index_name:
%
s"
%
sub_index_name
)
logging
.
info
(
"get data_list:
%
s"
%
data_list
)
#
logging.info("get sub_index_name:%s"%sub_index_name)
#
logging.info("get data_list:%s"%data_list)
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
sub_index_name
,
data_list
=
data_list
,
es
=
es
,
)
if
sub_index_name
==
"topic"
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-star-routing"
,
data_list
=
data_list
,
es
=
es
,
)
# # 同时写4星及以上的帖子
# if len(topic_data_high_star_list)>0:
# self.elasticsearch_bulk_insert_data(
...
...
@@ -294,6 +321,16 @@ def get_type_info_map():
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
),
TypeInfo
(
name
=
'excellect-topic'
,
# 优质帖子
type
=
'excellect-topic'
,
model
=
topic
.
ExcellentTopic
,
query_deferred
=
lambda
:
topic
.
ExcellentTopic
.
objects
.
all
()
.
query
,
get_data_func
=
ExcellectTopicTransfer
.
get_excellect_topic_data
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
),
TypeInfo
(
name
=
'topic'
,
# 日记
type
=
'topic'
,
...
...
trans2es/utils/excellect_topic_transfer.py
0 → 100644
View file @
8365b095
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import
os
import
sys
import
logging
import
traceback
from
libs.tools
import
tzlc
import
time
import
re
import
datetime
from
trans2es.models.user
import
User
from
trans2es.models.topic
import
Topic
from
trans2es.utils.topic_transfer
import
TopicTransfer
class
ExcellectTopicTransfer
(
object
):
@classmethod
def
get_excellect_topic_data
(
cls
,
instance
):
try
:
topic_id
=
instance
.
topic_id
is_online
=
instance
.
is_online
is_deleted
=
instance
.
is_deleted
res
=
None
if
is_online
and
not
is_deleted
:
topic_ins
=
Topic
.
objects
.
filter
(
id
=
topic_id
)
.
first
()
if
topic_ins
:
res
=
TopicTransfer
.
get_topic_data
(
topic_ins
,
is_excellect
=
True
)
return
res
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
None
trans2es/utils/topic_transfer.py
View file @
8365b095
...
...
@@ -9,11 +9,12 @@ import time
import
re
import
datetime
from
trans2es.models.user
import
User
from
trans2es.models.topic
import
ExcellentTopic
class
TopicTransfer
(
object
):
@classmethod
def
get_topic_data
(
cls
,
instance
):
def
get_topic_data
(
cls
,
instance
,
is_excellect
=
False
):
try
:
res
=
dict
()
...
...
@@ -111,6 +112,15 @@ class TopicTransfer(object):
res
[
"total_vote_num"
]
=
instance
.
get_virtual_vote_num
()
+
instance
.
vote_num
if
is_excellect
:
res
[
"is_excellent"
]
=
1
else
:
excelllect_object
=
ExcellentTopic
.
objects
.
filter
(
topic_id
=
instance
.
id
)
.
first
()
if
excelllect_object
and
excelllect_object
.
is_online
and
not
excelllect_object
.
is_deleted
:
res
[
"is_excellent"
]
=
1
else
:
res
[
"is_excellent"
]
=
0
logging
.
info
(
"test topic transfer time cost,time0:
%
d,time1:
%
d,time2:
%
d,time3:
%
d,time4:
%
d"
%
(
time0
,
time1
,
time2
,
time3
,
time4
))
return
res
except
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment