Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
0ad7f0ca
Commit
0ad7f0ca
authored
Mar 27, 2019
by
lixiaofang
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'test' of git.wanmeizhensuo.com:alpha/physical into test
parents
8419b60e
5ccf3977
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
192 additions
and
103 deletions
+192
-103
.gitignore
.gitignore
+14
-0
workspace.xml
.idea/workspace.xml
+0
-0
tasks.py
injection/data_sync/tasks.py
+0
-1
es.py
libs/es.py
+9
-8
collect_data.py
linucb/views/collect_data.py
+13
-10
linucb.py
linucb/views/linucb.py
+6
-5
topic.py
search/utils/topic.py
+0
-0
business_topic.py
search/views/business_topic.py
+4
-1
group.py
search/views/group.py
+4
-9
topic.py
search/views/topic.py
+14
-17
trans2es_data2es_parallel.py
trans2es/management/commands/trans2es_data2es_parallel.py
+1
-0
topic-high-star.json
trans2es/mapping/topic-high-star.json
+54
-0
group.py
trans2es/models/group.py
+9
-9
pictorial.py
trans2es/models/pictorial.py
+1
-11
topic.py
trans2es/models/topic.py
+13
-19
type_info.py
trans2es/type_info.py
+29
-6
group_transfer.py
trans2es/utils/group_transfer.py
+1
-1
topic_transfer.py
trans2es/utils/topic_transfer.py
+20
-6
No files found.
.gitignore
View file @
0ad7f0ca
...
...
@@ -58,6 +58,7 @@ docs/_build/
# config
fabfile.py
settings.online.py
/gaia/settings.py
settings_local.py
media/
log/
...
...
@@ -69,3 +70,16 @@ Vagrantfile
*.DS_Store
dump.rdb
# .gitignore for yangchuncheng
api/management/commands/ycc*
settings_override*
.script/
.tmp.sql
.env
*.pem
/gaia/hospital_list_settings.py
coverage_html/
gaia/rpcd.json
*.swp
dbmw_deploy/config.dir/
.idea/workspace.xml
View file @
0ad7f0ca
This diff is collapsed.
Click to expand it.
injection/data_sync/tasks.py
View file @
0ad7f0ca
...
...
@@ -21,7 +21,6 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"duan add,es_type:
%
s"
%
str
(
es_type
))
logging
.
info
(
"get es_type:
%
s"
%
es_type
)
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
...
...
libs/es.py
View file @
0ad7f0ca
...
...
@@ -111,10 +111,6 @@ class ESPerform(object):
return
False
mapping_dict
=
cls
.
__load_mapping
(
sub_index_name
)
logging
.
info
(
"get write_alias_name:
%
s"
%
write_alias_name
)
logging
.
info
(
"get mapping_dict:
%
s"
%
mapping_dict
)
logging
.
info
(
"get mapping_type:
%
s"
%
mapping_type
)
es_cli
.
indices
.
put_mapping
(
index
=
write_alias_name
,
body
=
mapping_dict
,
doc_type
=
mapping_type
)
return
True
...
...
@@ -226,17 +222,22 @@ class ESPerform(object):
for
item
in
es_nodes_info_list
:
try
:
item_list
=
item
.
split
(
" "
)
if
len
(
item_list
)
>
4
:
if
len
(
item_list
)
==
11
:
cpu_load
=
item_list
[
4
]
elif
len
(
item_list
)
==
10
:
cpu_load
=
item_list
[
3
]
if
int
(
cpu_load
)
>
60
:
else
:
continue
int_cpu_load
=
int
(
cpu_load
)
if
int_cpu_load
>
60
:
high_num
+=
1
es_nodes_list
.
append
(
int
(
cpu_load
)
)
es_nodes_list
.
append
(
int_cpu_load
)
except
:
logging
.
error
(
"catch exception,item:
%
s,err_msg:
%
s"
%
(
str
(
item
),
traceback
.
format_exc
()))
return
True
if
high_num
>
3
:
logging
.
info
(
"check es_nodes_load high,cpu load:
%
s
"
%
str
(
es_nodes_info_list
))
logging
.
info
(
"check es_nodes_load high,cpu load:
%
s
,ori_cpu_info:
%
s"
%
(
str
(
es_nodes_list
),
str
(
es_nodes_info_list
)
))
return
True
else
:
return
False
...
...
linucb/views/collect_data.py
View file @
0ad7f0ca
...
...
@@ -98,9 +98,10 @@ class CollectData(object):
logging
.
info
(
"consume topic_id:
%
s,device_id:
%
s"
%
(
str
(
topic_id
),
str
(
device_id
)))
tag_list
=
list
()
sql_query_results
=
TopicTag
.
objects
.
filter
(
is_online
=
True
,
topic_id
=
topic_id
)
for
sql_item
in
sql_query_results
:
tag_list
.
append
(
sql_item
.
tag_id
)
click_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
)
.
values_list
(
"tag_id"
,
"is_online"
)
for
tag_id
,
is_online
in
click_sql_query_results
:
if
is_online
:
tag_list
.
append
(
tag_id
)
is_click
=
1
is_vote
=
0
...
...
@@ -130,17 +131,19 @@ class CollectData(object):
exposure_topic_id
=
item
[
"card_id"
]
logging
.
info
(
"consume exposure topic_id:
%
s,device_id:
%
s"
%
(
str
(
exposure_topic_id
),
str
(
device_id
)))
if
exposure_topic_id
:
exposure_topic_id_list
.
append
(
exposure_topic_id
)
topic_tag_id_dict
=
dict
()
tag_list
=
list
()
sql_query_results
=
TopicTag
.
objects
.
filter
(
is_online
=
True
,
topic_id__in
=
exposure_topic_id_list
)
for
sql_item
in
sql_query_results
:
tag_list
.
append
(
sql_item
.
tag_id
)
if
sql_item
.
topic_id
not
in
topic_tag_id_dict
:
topic_tag_id_dict
[
sql_item
.
topic_id
]
=
list
()
topic_tag_id_dict
[
sql_item
.
topic_id
]
.
append
(
sql_item
.
tag_id
)
exposure_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id__in
=
exposure_topic_id_list
)
.
values_list
(
"topic_id"
,
"tag_id"
,
"is_online"
)
for
topic_id
,
tag_id
,
is_online
in
exposure_sql_query_results
:
if
is_online
:
tag_list
.
append
(
tag_id
)
if
topic_id
not
in
topic_tag_id_dict
:
topic_tag_id_dict
[
topic_id
]
=
list
()
topic_tag_id_dict
[
topic_id
]
.
append
(
tag_id
)
is_click
=
0
is_vote
=
0
...
...
linucb/views/linucb.py
View file @
0ad7f0ca
...
...
@@ -9,6 +9,8 @@ import logging
import
traceback
import
json
import
pickle
from
django.conf
import
settings
class
LinUCB
:
d
=
2
...
...
@@ -22,12 +24,9 @@ class LinUCB:
try
:
if
len
(
cls
.
default_tag_list
)
==
0
:
query_item_results
=
Tag
.
objects
.
filter
(
is_online
=
True
)
for
item
in
query_item_results
:
cls
.
default_tag_list
.
append
(
item
.
id
)
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
return
cls
.
default_tag_list
[:
20
]
return
cls
.
default_tag_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
...
...
@@ -87,6 +86,8 @@ class LinUCB:
for
top_score
in
sorted_np_score_list
:
for
top_score_index
in
np_score_dict
[
top_score
]:
top_tag_set
.
add
(
str
(
tag_list
[
top_score_index
],
encoding
=
"utf-8"
))
if
len
(
top_tag_set
)
>=
10
:
break
if
len
(
top_tag_set
)
>=
10
:
break
...
...
search/utils/topic.py
View file @
0ad7f0ca
This diff is collapsed.
Click to expand it.
search/views/business_topic.py
View file @
0ad7f0ca
...
...
@@ -18,11 +18,14 @@ def business_topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=
"""帖子搜索。"""
try
:
result_list
=
TopicUtils
.
list_topic_ids
(
result_list
=
TopicUtils
.
business_topic_ids
(
filters
=
filters
,
nfilters
=
nfilters
,
sorts_by
=
sorts_by
,
offset
=
offset
,
size
=
size
,
filter_online
=
False
,
index_name
=
"topic"
)
logging
.
info
(
"get result_lsit:
%
s"
%
result_list
)
topic_ids
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_list
[
"hits"
]]
return
{
"topic_ids"
:
topic_ids
,
"total_count"
:
result_list
[
"total_count"
]}
except
:
...
...
search/views/group.py
View file @
0ad7f0ca
...
...
@@ -13,7 +13,7 @@ from libs.es import ESPerform
@bind
(
"physical/search/query_pictorial"
)
def
query_
group
(
query
=
""
,
offset
=
0
,
size
=
10
):
def
query_
pictorial
(
query
=
""
,
offset
=
0
,
size
=
10
):
"""
:remark:小组搜索排序策略,缺少排序策略
:param query:
...
...
@@ -103,7 +103,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
}
}
q
[
"_source"
]
=
{
"include"
:
[
"id"
,
"pictorial_id"
,
"tag_list"
]
"include
s
"
:
[
"id"
,
"pictorial_id"
,
"tag_list"
]
}
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"topic"
,
q
,
offset
,
size
)
logging
.
info
(
"get result_dict:
%
s"
%
result_dict
)
...
...
@@ -140,7 +140,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
}
q
[
"_source"
]
=
{
"include"
:
[
"id"
,
"update_time"
]
"include
s
"
:
[
"id"
,
"update_time"
]
}
q
[
"sort"
]
=
{
'update_time'
:
{
...
...
@@ -153,8 +153,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
for
item
in
result_dict
[
"hits"
]:
pictorial_id
=
item
[
"_source"
][
"id"
]
pictorial_id_list
.
append
(
pictorial_id
)
logging
.
info
(
"get pictorial_id_list:
%
s"
%
pictorial_id_list
)
logging
.
info
(
"get topic_tag_list:
%
s"
%
topic_tag_list
)
if
len
(
pictorial_id_list
)
<
10
:
num
=
10
-
len
(
pictorial_id_list
)
...
...
@@ -175,7 +173,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
}
}
q
[
"_source"
]
=
{
"include"
:
[
"id"
,
"tag_id"
]}
"include
s
"
:
[
"id"
,
"tag_id"
]}
q
[
"sort"
]
=
{
'update_time'
:
{
'order'
:
'desc'
...
...
@@ -192,9 +190,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
if
id
not
in
pictorial_id_list
:
pictorial_id_list
.
append
(
id
)
#
logging
.
info
(
"get result_dict tag:
%
s"
%
result_dict
)
logging
.
info
(
"get pictorial_id_list tag:
%
s"
%
pictorial_id_list
)
pictorial_list
=
pictorial_id_list
if
len
(
pictorial_id_list
)
<
10
else
pictorial_id_list
[:
10
]
return
{
"pictorial_ids_list"
:
pictorial_list
}
...
...
search/views/topic.py
View file @
0ad7f0ca
...
...
@@ -27,7 +27,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
0
,
offset
=
0
,
size
=
size
,
single_size
=
size
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
)
filter_topic_id_list
=
have_read_topic_id_list
,
index_type
=
"topic-high-star"
)
have_read_topic_id_list
.
extend
(
recommend_topic_ids
)
...
...
@@ -46,18 +46,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type
=
TopicPageType
.
HOME_RECOMMEND
):
try
:
if
query
is
None
:
if
user_id
==
-
1
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_recommend"
+
":user_id:"
+
str
(
user_id
)
+
":query_type:"
+
str
(
query_type
)
else
:
if
user_id
==
-
1
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":device_id:"
+
device_id
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
else
:
redis_key
=
"physical:home_query"
+
":user_id:"
+
str
(
user_id
)
+
":query:"
+
str
(
query
)
+
":query_type:"
+
str
(
query_type
)
redis_field_list
=
[
b
'have_read_topic_list'
]
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
...
...
@@ -72,7 +65,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_ids
=
[]
have_read_topic_id_list
=
list
()
if
redis_field_val_list
[
0
]
and
query
is
None
:
if
redis_field_val_list
[
0
]:
if
query
is
None
:
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
else
:
if
offset
>
0
:
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
user_similar_score_redis_key
=
"physical:user_similar_score:user_id:"
+
str
(
user_id
)
...
...
@@ -84,7 +81,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_tag_list
,
user_similar_score_list
=
user_similar_score_redis_list
)
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic-high-star"
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
...
...
@@ -170,11 +167,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
recommend_topic_ids
=
list
()
es_node_load_high_flag
=
False
try
:
es_node_load_high_flag
=
ESPerform
.
if_es_node_load_high
(
ESPerform
.
get_cli
())
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
es_node_load_high_flag
=
True
#
try:
#
es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli())
#
except:
#
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#
es_node_load_high_flag = True
if
es_node_load_high_flag
:
temp_downgrading_key
=
"physical:home_recommend:user_id:241407656:query_type:1"
...
...
@@ -273,7 +270,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list
=
TopicUtils
.
get_topic_detail_recommend_list
(
user_id
,
topic_id
,
topic_tag_list
,
topic_pictorial_id
,
topic_user_id
,
filter_topic_user_id
,
have_read_topic_list
,
offset
,
size
,
es_cli_obj
)
have_read_topic_list
,
offset
,
size
,
es_cli_obj
,
index_type
=
"topic-high-star"
)
recommend_topic_ids_list
=
list
()
if
len
(
result_list
)
>
0
:
recommend_topic_ids_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_list
]
...
...
@@ -348,7 +345,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
must_topic_id_list
=
list
(
topic_similarity_score_dict
.
keys
())
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
tag_id
=
0
,
user_id
=-
1
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
must_topic_id_list
=
must_topic_id_list
)
must_topic_id_list
=
must_topic_id_list
,
index_type
=
"topic-high-star"
)
return
{
"recommend_topic_ids"
:
topic_id_list
}
except
:
...
...
trans2es/management/commands/trans2es_data2es_parallel.py
View file @
0ad7f0ca
...
...
@@ -49,6 +49,7 @@ class Job(object):
def
__call__
(
self
):
type_info
=
get_type_info_map
()[
self
.
_type_name
]
assert
isinstance
(
type_info
,
TypeInfo
)
result
=
type_info
.
insert_table_chunk
(
sub_index_name
=
self
.
_sub_index_name
,
table_chunk
=
self
.
_chunk
,
...
...
trans2es/mapping/topic-high-star.json
0 → 100644
View file @
0ad7f0ca
{
"dynamic"
:
"strict"
,
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
"tag_name_list"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"share_num"
:{
"type"
:
"long"
},
"pick_id_list"
:{
"type"
:
"long"
},
"offline_score"
:{
"type"
:
"double"
},
//离线算分
"manual_score"
:{
"type"
:
"double"
},
//人工赋分
"has_image"
:{
"type"
:
"boolean"
},
//是否有图
"has_video"
:{
"type"
:
"boolean"
},
//是否是视频
"create_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"create_time_val"
:{
"type"
:
"long"
},
"update_time_val"
:{
"type"
:
"long"
},
"language_type"
:{
"type"
:
"long"
},
"is_shadow"
:
{
"type"
:
"boolean"
},
"is_recommend"
:
{
"type"
:
"boolean"
},
"is_complaint"
:
{
"type"
:
"boolean"
},
//
是否被举报
"virtual_content_level"
:{
"type"
:
"text"
},
"like_num_crawl"
:
{
"type"
:
"long"
},
//
爬取点赞数
"comment_num_crawl"
:
{
"type"
:
"long"
},
//
爬取评论数
"is_crawl"
:
{
"type"
:
"boolean"
},
"platform"
:
{
"type"
:
"long"
},
"platform_id"
:
{
"type"
:
"long"
},
"drop_score"
:{
"type"
:
"double"
},
//
人工降分
"sort_score"
:{
"type"
:
"double"
},
//
排序分
"pictorial_id"
:{
"type"
:
"long"
},
//所在组ID
"pictorial_name"
:{
//
所在组名称
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
}
\ No newline at end of file
trans2es/models/group.py
View file @
0ad7f0ca
...
...
@@ -27,15 +27,15 @@ class Group(models.Model):
create_time
=
models
.
DateTimeField
(
verbose_name
=
u'创建时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
#
#
获取前一天4,5星发帖数
#
def get_high_quality_topic_num(self):
#
yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
#
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
#
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
#
topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
#
return topic_num
#获取前一天4,5星发帖数
def
get_high_quality_topic_num
(
self
):
yesterday
=
datetime
.
datetime
.
now
()
-
datetime
.
timedelta
(
days
=
1
)
yesterday_begin_time
=
"
%
s-
%
s-
%
s 00:00:00"
%
(
yesterday
.
year
,
yesterday
.
month
,
yesterday
.
day
)
yesterday_end_time
=
"
%
s-
%
s-
%
s 23:59:59"
%
(
yesterday
.
year
,
yesterday
.
month
,
yesterday
.
day
)
topic_num
=
self
.
group_topics
.
filter
(
content_level__in
=
(
"4"
,
"5"
),
create_time__gte
=
yesterday_begin_time
,
create_time__lte
=
yesterday_end_time
)
.
count
()
return
topic_num
def
detail
(
self
):
result
=
{
...
...
trans2es/models/pictorial.py
View file @
0ad7f0ca
from
django.db
import
models
import
datetime
from
django.db
import
models
import
logging
import
traceback
...
...
@@ -81,13 +81,9 @@ class Pictorial(models.Model):
topic_id_list
=
PictorialTopics
.
objects
.
filter
(
pictorial_id
=
self
.
id
)
.
values_list
(
"topic_id"
,
flat
=
True
)
logging
.
info
(
"get topic_id_list:
%
s"
%
topic_id_list
)
topic_num
=
Topic
.
objects
.
filter
(
content_level__in
=
[
"4"
,
"5"
],
create_time__gte
=
yesterday_begin_time
,
create_time__lte
=
yesterday_end_time
,
id__in
=
topic_id_list
)
.
count
()
logging
.
info
(
"get topic_num:
%
s"
%
topic_num
)
return
topic_num
except
:
...
...
@@ -98,9 +94,7 @@ class Pictorial(models.Model):
try
:
tag_id_list
=
[]
tags
=
PictorialTag
.
objects
.
filter
(
pictorial_id
=
self
.
id
,
is_online
=
True
)
.
values_list
(
"tag_id"
,
flat
=
True
)
logging
.
info
(
"get tags:
%
s"
%
tags
)
for
i
in
tags
:
logging
.
info
(
"get tags i:
%
s"
%
i
)
tag_id_list
.
append
(
i
)
return
tag_id_list
...
...
@@ -112,13 +106,9 @@ class Pictorial(models.Model):
def
get_tag_by_name
(
self
,
tag_id
):
try
:
tag_name_list
=
[]
logging
.
info
(
"get tag_id:
%
s"
%
tag_id
)
tags
=
Tag
.
objects
.
filter
(
id__in
=
tag_id
,
is_online
=
True
)
.
values_list
(
"name"
,
flat
=
True
)
logging
.
info
(
"get tags name :
%
s"
%
tags
)
for
i
in
tags
:
tag_name_list
.
append
(
i
)
logging
.
info
(
"get tags name i:
%
s"
%
i
)
return
tag_name_list
...
...
trans2es/models/topic.py
View file @
0ad7f0ca
...
...
@@ -52,9 +52,9 @@ class Topic(models.Model):
id
=
models
.
IntegerField
(
verbose_name
=
u'日记ID'
,
primary_key
=
True
)
name
=
models
.
CharField
(
verbose_name
=
u'日记名称'
,
max_length
=
100
)
# group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
#
group = models.ForeignKey(
#
Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
#
on_delete=models.CASCADE)
group
=
models
.
ForeignKey
(
Group
,
verbose_name
=
u"关联的小组"
,
related_name
=
u"group_topics"
,
null
=
True
,
blank
=
True
,
default
=
None
,
on_delete
=
models
.
CASCADE
)
user_id
=
models
.
IntegerField
(
verbose_name
=
u'用户ID'
)
has_video
=
models
.
BooleanField
(
verbose_name
=
u'是否是视频日记'
)
drop_score
=
models
.
IntegerField
(
verbose_name
=
u'人工赋分'
,
default
=
0
)
...
...
@@ -106,8 +106,7 @@ class Topic(models.Model):
try
:
has_image
=
False
query_list
=
TopicImage
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
is_deleted
=
False
,
is_online
=
True
)
query_list
=
TopicImage
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
is_deleted
=
False
,
is_online
=
True
)
if
len
(
query_list
)
>
0
:
has_image
=
True
...
...
@@ -133,13 +132,12 @@ class Topic(models.Model):
topic_tag_id_list
=
list
()
edit_tag_id_list
=
list
()
tag_id_list
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
tag_id_list
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
tag_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
tag_id_list
)
for
tag_item
in
tag_query_results
:
is_online
=
tag_item
.
is_online
is_deleted
=
tag_item
.
is_deleted
collection
=
tag_item
.
collection
is_online
=
tag_item
.
is_online
is_deleted
=
tag_item
.
is_deleted
collection
=
tag_item
.
collection
if
is_online
and
not
is_deleted
:
topic_tag_id_list
.
append
(
tag_item
.
id
)
...
...
@@ -149,7 +147,7 @@ class Topic(models.Model):
return
(
topic_tag_id_list
,
edit_tag_id_list
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
([],
[])
return
([],[])
def
get_tag_name_list
(
self
,
tag_id_list
):
try
:
...
...
@@ -188,12 +186,9 @@ class Topic(models.Model):
elif
self
.
content_level
==
'3'
:
offline_score
+=
2.0
exposure_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
exposure_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
if
exposure_count
>
0
:
offline_score
+=
click_count
/
exposure_count
...
...
@@ -224,8 +219,7 @@ class TopicComplaint(models.Model):
id
=
models
.
IntegerField
(
verbose_name
=
'日记图片ID'
,
primary_key
=
True
)
user_id
=
models
.
BigIntegerField
(
verbose_name
=
u'用户ID'
,
db_index
=
True
)
topic
=
models
.
ForeignKey
(
Topic
,
verbose_name
=
u"关联的帖子"
,
null
=
True
,
blank
=
True
,
default
=
None
,
on_delete
=
models
.
CASCADE
,
related_name
=
'complaints'
)
Topic
,
verbose_name
=
u"关联的帖子"
,
null
=
True
,
blank
=
True
,
default
=
None
,
on_delete
=
models
.
CASCADE
,
related_name
=
'complaints'
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
)
...
...
trans2es/type_info.py
View file @
0ad7f0ca
...
...
@@ -81,6 +81,9 @@ class TypeInfo(object):
def
bulk_get_data
(
self
,
instance_iterable
):
data_list
=
[]
# 4,5星帖子单独索引
topic_data_high_star_list
=
list
()
if
self
.
batch_get_data_func
:
_pk_list
=
[
getattr
(
instance
,
'pk'
,
None
)
for
instance
in
instance_iterable
]
not_found_pk_list
=
[]
...
...
@@ -137,7 +140,10 @@ class TypeInfo(object):
))
else
:
data_list
.
append
(
data
)
return
data_list
if
self
.
type
==
"topic"
and
instance
.
content_level
and
int
(
instance
.
content_level
)
>=
4
:
topic_data_high_star_list
.
append
(
data
)
return
(
data_list
,
topic_data_high_star_list
)
def
elasticsearch_bulk_insert_data
(
self
,
sub_index_name
,
data_list
,
es
=
None
):
...
...
@@ -162,10 +168,10 @@ class TypeInfo(object):
# traceback.print_exc()
# es_result = 'error'
return
ESPerform
.
es_helpers_bulk
(
es
,
data_list
,
sub_index_name
,
True
)
return
ESPerform
.
es_helpers_bulk
(
es
,
data_list
,
sub_index_name
)
def
elasticsearch_bulk_insert
(
self
,
sub_index_name
,
instance_iterable
,
es
=
None
):
data_list
=
self
.
bulk_get_data
(
instance_iterable
)
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_iterable
)
return
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
sub_index_name
,
data_list
=
data_list
,
...
...
@@ -188,7 +194,7 @@ class TypeInfo(object):
time1
=
end
-
begin
begin
=
time
.
time
()
data_list
=
self
.
bulk_get_data
(
instance_list
)
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_list
)
end
=
time
.
time
()
time2
=
end
-
begin
...
...
@@ -201,6 +207,14 @@ class TypeInfo(object):
data_list
=
data_list
,
es
=
es
,
)
# 同时写4星及以上的帖子
if
len
(
topic_data_high_star_list
)
>
0
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-high-star"
,
data_list
=
topic_data_high_star_list
,
es
=
es
,
)
end
=
time
.
time
()
time3
=
end
-
begin
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
...
...
@@ -215,7 +229,7 @@ class TypeInfo(object):
stage_1_time
=
time
.
time
()
data_list
=
self
.
bulk_get_data
(
instance_list
)
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_list
)
stage_2_time
=
time
.
time
()
...
...
@@ -260,7 +274,16 @@ def get_type_info_map():
return
_get_type_info_map_result
type_info_list
=
[
TypeInfo
(
name
=
'topic-high-star'
,
# >=4星日记
type
=
'topic-high-star'
,
model
=
topic
.
Topic
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
get_data_func
=
TopicTransfer
.
get_topic_data
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
),
TypeInfo
(
name
=
'topic'
,
# 日记
type
=
'topic'
,
...
...
trans2es/utils/group_transfer.py
View file @
0ad7f0ca
...
...
@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time
=
instance
.
update_time
tzlc_udpate_time
=
tzlc
(
update_time
)
res
[
"update_time"
]
=
tzlc_udpate_time
#
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
res
[
"high_quality_topic_num"
]
=
instance
.
get_high_quality_topic_num
()
return
res
except
:
...
...
trans2es/utils/topic_transfer.py
View file @
0ad7f0ca
...
...
@@ -28,11 +28,11 @@ class TopicTransfer(object):
res
[
"content_level"
]
=
instance
.
content_level
res
[
"user_id"
]
=
instance
.
user_id
#
if instance.group:
#
res["group_id"] = instance.group.id
#
else:
#
res["group_id"] = -1
if
instance
.
group
:
res
[
"group_id"
]
=
instance
.
group
.
id
else
:
res
[
"group_id"
]
=
-
1
res
[
"share_num"
]
=
instance
.
share_num
res
[
"pictorial_id"
]
=
instance
.
get_pictorial_id
()
...
...
@@ -82,6 +82,20 @@ class TopicTransfer(object):
# else:
# res["language_type"] = instance.language_type
res
[
"is_shadow"
]
=
instance
.
is_shadow
res
[
"is_recommend"
]
=
True
if
instance
.
is_recommend
else
False
res
[
"is_complaint"
]
=
instance
.
is_complaint
res
[
"virtual_content_level"
]
=
instance
.
virtual_content_level
res
[
"like_num_crawl"
]
=
instance
.
like_num_crawl
res
[
"comment_num_crawl"
]
=
instance
.
comment_num_crawl
res
[
"is_crawl"
]
=
instance
.
is_crawl
res
[
"platform"
]
=
instance
.
platform
res
[
"platform_id"
]
=
instance
.
platform_id
res
[
"drop_score"
]
=
instance
.
drop_score
res
[
"sort_score"
]
=
instance
.
sort_score
create_time
=
instance
.
create_time
tzlc_create_time
=
tzlc
(
create_time
)
...
...
@@ -91,7 +105,7 @@ class TopicTransfer(object):
update_time
=
instance
.
update_time
tzlc_update_time
=
tzlc
(
update_time
)
#
res["update_time"] = tzlc_update_time
res
[
"update_time"
]
=
tzlc_update_time
res
[
"update_time_val"
]
=
int
(
time
.
mktime
(
tzlc_update_time
.
timetuple
()))
logging
.
info
(
"test topic transfer time cost,time0:
%
d,time1:
%
d,time2:
%
d,time3:
%
d,time4:
%
d"
%
(
time0
,
time1
,
time2
,
time3
,
time4
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment