Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
9ae99879
Commit
9ae99879
authored
Mar 25, 2019
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
merge
parent
12c63120
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
97 additions
and
25 deletions
+97
-25
topic-v1.json
trans2es/mapping/topic-v1.json
+53
-0
topic.json
trans2es/mapping/topic.json
+0
-0
user.json
trans2es/mapping/user.json
+17
-3
type_info.py
trans2es/type_info.py
+27
-22
No files found.
trans2es/mapping/topic-v1.json
0 → 100644
View file @
9ae99879
{
"dynamic"
:
"strict"
,
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
"tag_name_list"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"share_num"
:{
"type"
:
"long"
},
"pick_id_list"
:{
"type"
:
"long"
},
"offline_score"
:{
"type"
:
"double"
},
//离线算分
"manual_score"
:{
"type"
:
"double"
},
//人工赋分
"has_image"
:{
"type"
:
"boolean"
},
//是否有图
"has_video"
:{
"type"
:
"boolean"
},
//是否是视频
"create_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"create_time_val"
:{
"type"
:
"long"
},
"update_time_val"
:{
"type"
:
"long"
},
"language_type"
:{
"type"
:
"long"
},
"is_shadow"
:
{
"type"
:
"boolean"
},
"is_recommend"
:
{
"type"
:
"boolean"
},
"is_complaint"
:
{
"type"
:
"boolean"
},
//
是否被举报
"virtual_content_level"
:{
"type"
:
"text"
},
"like_num_crawl"
:
{
"type"
:
"long"
},
//
爬取点赞数
"comment_num_crawl"
:
{
"type"
:
"long"
},
//
爬取评论数
"is_crawl"
:
{
"type"
:
"boolean"
},
"platform"
:
{
"type"
:
"long"
},
"platform_id"
:
{
"type"
:
"long"
},
"drop_score"
:{
"type"
:
"double"
},
//
人工降分
"sort_score"
:{
"type"
:
"double"
},
//
排序分
"pictorial_id"
:{
"type"
:
"long"
},
//所在组ID
"pictorial_name"
:{
//
所在组名称
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
}
trans2es/mapping/topic.json
View file @
9ae99879
trans2es/mapping/user.json
View file @
9ae99879
...
@@ -28,17 +28,31 @@
...
@@ -28,17 +28,31 @@
"country_id"
:{
"type"
:
"text"
}
"country_id"
:{
"type"
:
"text"
}
}
}
},
},
"same_group_user_id_list"
:{
//同组用户列表
//
"same_group_user_id_list"
:{
//同组用户列表
//
"type"
:
"nested"
,
//
"properties"
:{
//
"user_id"
:{
"type"
:
"long"
},
//
"country_id"
:{
"type"
:
"text"
}
//
}
//
},
//
"attention_group_id_list"
:{
//关注小组列表
//
"type"
:
"nested"
,
//
"properties"
:{
//
"group_id"
:{
"type"
:
"long"
},
//
"update_time_val"
:{
"type"
:
"long"
}
//
}
//
},
"same_pictorial_user_id_list"
:{
//同画报用户列表
"type"
:
"nested"
,
"type"
:
"nested"
,
"properties"
:{
"properties"
:{
"user_id"
:{
"type"
:
"long"
},
"user_id"
:{
"type"
:
"long"
},
"country_id"
:{
"type"
:
"text"
}
"country_id"
:{
"type"
:
"text"
}
}
}
},
},
"attention_
group_id_list"
:{
//关注小组
列表
"attention_
pictorial_id_list"
:{
//关注画报
列表
"type"
:
"nested"
,
"type"
:
"nested"
,
"properties"
:{
"properties"
:{
"
group
_id"
:{
"type"
:
"long"
},
"
pictorial
_id"
:{
"type"
:
"long"
},
"update_time_val"
:{
"type"
:
"long"
}
"update_time_val"
:{
"type"
:
"long"
}
}
}
},
},
...
...
trans2es/type_info.py
View file @
9ae99879
...
@@ -12,11 +12,12 @@ import elasticsearch
...
@@ -12,11 +12,12 @@ import elasticsearch
import
elasticsearch.helpers
import
elasticsearch.helpers
import
sys
import
sys
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
,
pictorial
from
trans2es.utils.user_transfer
import
UserTransfer
from
trans2es.utils.user_transfer
import
UserTransfer
from
trans2es.utils.pick_celebrity_transfer
import
PickCelebrityTransfer
from
trans2es.utils.pick_celebrity_transfer
import
PickCelebrityTransfer
from
trans2es.utils.group_transfer
import
GroupTransfer
from
trans2es.utils.group_transfer
import
GroupTransfer
from
trans2es.utils.topic_transfer
import
TopicTransfer
from
trans2es.utils.topic_transfer
import
TopicTransfer
from
trans2es.utils.pictorial_transfer
import
PictorialTransfer
from
trans2es.utils.celebrity_transfer
import
CelebrityTransfer
from
trans2es.utils.celebrity_transfer
import
CelebrityTransfer
from
trans2es.utils.tag_transfer
import
TagTransfer
from
trans2es.utils.tag_transfer
import
TagTransfer
from
trans2es.utils.contrast_similar_transfer
import
Contrast_Similar_Transfer
from
trans2es.utils.contrast_similar_transfer
import
Contrast_Similar_Transfer
...
@@ -80,8 +81,6 @@ class TypeInfo(object):
...
@@ -80,8 +81,6 @@ class TypeInfo(object):
def
bulk_get_data
(
self
,
instance_iterable
):
def
bulk_get_data
(
self
,
instance_iterable
):
data_list
=
[]
data_list
=
[]
# 4,5星帖子单独索引
topic_data_high_star_list
=
list
()
if
self
.
batch_get_data_func
:
if
self
.
batch_get_data_func
:
_pk_list
=
[
getattr
(
instance
,
'pk'
,
None
)
for
instance
in
instance_iterable
]
_pk_list
=
[
getattr
(
instance
,
'pk'
,
None
)
for
instance
in
instance_iterable
]
not_found_pk_list
=
[]
not_found_pk_list
=
[]
...
@@ -138,9 +137,7 @@ class TypeInfo(object):
...
@@ -138,9 +137,7 @@ class TypeInfo(object):
))
))
else
:
else
:
data_list
.
append
(
data
)
data_list
.
append
(
data
)
if
self
.
type
==
"topic"
and
instance
.
content_level
and
int
(
instance
.
content_level
)
>=
4
:
return
data_list
topic_data_high_star_list
.
append
(
data
)
return
(
data_list
,
topic_data_high_star_list
)
def
elasticsearch_bulk_insert_data
(
self
,
sub_index_name
,
data_list
,
es
=
None
):
def
elasticsearch_bulk_insert_data
(
self
,
sub_index_name
,
data_list
,
es
=
None
):
...
@@ -165,10 +162,10 @@ class TypeInfo(object):
...
@@ -165,10 +162,10 @@ class TypeInfo(object):
# traceback.print_exc()
# traceback.print_exc()
# es_result = 'error'
# es_result = 'error'
return
ESPerform
.
es_helpers_bulk
(
es
,
data_list
,
sub_index_name
)
return
ESPerform
.
es_helpers_bulk
(
es
,
data_list
,
sub_index_name
,
True
)
def
elasticsearch_bulk_insert
(
self
,
sub_index_name
,
instance_iterable
,
es
=
None
):
def
elasticsearch_bulk_insert
(
self
,
sub_index_name
,
instance_iterable
,
es
=
None
):
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_iterable
)
data_list
=
self
.
bulk_get_data
(
instance_iterable
)
return
self
.
elasticsearch_bulk_insert_data
(
return
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
sub_index_name
,
sub_index_name
=
sub_index_name
,
data_list
=
data_list
,
data_list
=
data_list
,
...
@@ -191,24 +188,19 @@ class TypeInfo(object):
...
@@ -191,24 +188,19 @@ class TypeInfo(object):
time1
=
end
-
begin
time1
=
end
-
begin
begin
=
time
.
time
()
begin
=
time
.
time
()
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_list
)
data_list
=
self
.
bulk_get_data
(
instance_list
)
end
=
time
.
time
()
end
=
time
.
time
()
time2
=
end
-
begin
time2
=
end
-
begin
begin
=
time
.
time
()
begin
=
time
.
time
()
logging
.
info
(
"get sub_index_name:
%
s"
%
sub_index_name
)
logging
.
info
(
"get data_list:
%
s"
%
data_list
)
self
.
elasticsearch_bulk_insert_data
(
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
sub_index_name
,
sub_index_name
=
sub_index_name
,
data_list
=
data_list
,
data_list
=
data_list
,
es
=
es
,
es
=
es
,
)
)
# 同时写4星及以上的帖子
if
len
(
topic_data_high_star_list
)
>
0
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-high-star"
,
data_list
=
topic_data_high_star_list
,
es
=
es
,
)
end
=
time
.
time
()
end
=
time
.
time
()
time3
=
end
-
begin
time3
=
end
-
begin
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
...
@@ -223,7 +215,7 @@ class TypeInfo(object):
...
@@ -223,7 +215,7 @@ class TypeInfo(object):
stage_1_time
=
time
.
time
()
stage_1_time
=
time
.
time
()
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_list
)
data_list
=
self
.
bulk_get_data
(
instance_list
)
stage_2_time
=
time
.
time
()
stage_2_time
=
time
.
time
()
...
@@ -268,9 +260,10 @@ def get_type_info_map():
...
@@ -268,9 +260,10 @@ def get_type_info_map():
return
_get_type_info_map_result
return
_get_type_info_map_result
type_info_list
=
[
type_info_list
=
[
TypeInfo
(
TypeInfo
(
name
=
'topic
-high-star'
,
# >=4星
日记
name
=
'topic
'
,
#
日记
type
=
'topic
-high-star
'
,
type
=
'topic'
,
model
=
topic
.
Topic
,
model
=
topic
.
Topic
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
get_data_func
=
TopicTransfer
.
get_topic_data
,
get_data_func
=
TopicTransfer
.
get_topic_data
,
...
@@ -279,8 +272,8 @@ def get_type_info_map():
...
@@ -279,8 +272,8 @@ def get_type_info_map():
round_insert_period
=
2
,
round_insert_period
=
2
,
),
),
TypeInfo
(
TypeInfo
(
name
=
'topic'
,
# 日记
name
=
'topic
-v1
'
,
# 日记
type
=
'topic'
,
type
=
'topic
-v1
'
,
model
=
topic
.
Topic
,
model
=
topic
.
Topic
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
get_data_func
=
TopicTransfer
.
get_topic_data
,
get_data_func
=
TopicTransfer
.
get_topic_data
,
...
@@ -352,7 +345,18 @@ def get_type_info_map():
...
@@ -352,7 +345,18 @@ def get_type_info_map():
bulk_insert_chunk_size
=
100
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
round_insert_period
=
2
),
TypeInfo
(
name
=
"pictorial"
,
# 画报
type
=
"pictorial"
,
model
=
pictorial
.
Pictorial
,
query_deferred
=
lambda
:
pictorial
.
Pictorial
.
objects
.
all
()
.
query
,
get_data_func
=
PictorialTransfer
.
get_poctorial_data
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
)
)
]
]
type_info_map
=
{
type_info_map
=
{
...
@@ -362,3 +366,4 @@ def get_type_info_map():
...
@@ -362,3 +366,4 @@ def get_type_info_map():
_get_type_info_map_result
=
type_info_map
_get_type_info_map_result
=
type_info_map
return
type_info_map
return
type_info_map
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment