Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
2010fc66
Commit
2010fc66
authored
May 31, 2019
by
lixiaofang
Browse files
Options
Browse Files
Download
Plain Diff
合并hotword
parents
3251c6a9
414af3cf
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
48 additions
and
47 deletions
+48
-47
es.py
libs/es.py
+16
-17
type_info.py
trans2es/type_info.py
+17
-14
tag_transfer.py
trans2es/utils/tag_transfer.py
+15
-16
No files found.
libs/es.py
View file @
2010fc66
...
...
@@ -155,9 +155,9 @@ class ESPerform(object):
bulk_actions
=
[]
if
sub_index_name
==
"topic"
or
\
sub_index_name
==
"topic-star-routing"
or
\
sub_index_name
==
"topic-high-star"
:
if
sub_index_name
==
"topic"
or
\
sub_index_name
==
"topic-star-routing"
or
\
sub_index_name
==
"topic-high-star"
:
for
data
in
data_list
:
if
data
:
bulk_actions
.
append
({
...
...
@@ -232,7 +232,7 @@ class ESPerform(object):
return
{
"total_count"
:
0
,
"hits"
:
[]}
@classmethod
def
get_analyze_results
(
cls
,
es_cli
,
sub_index_name
,
query_body
):
def
get_analyze_results
(
cls
,
es_cli
,
sub_index_name
,
query_body
):
try
:
assert
(
es_cli
is
not
None
)
...
...
@@ -242,7 +242,7 @@ class ESPerform(object):
logging
.
error
(
"index:
%
s is not existing,get_search_results error!"
%
official_index_name
)
return
None
res
=
es_cli
.
indices
.
analyze
(
index
=
official_index_name
,
body
=
query_body
)
res
=
es_cli
.
indices
.
analyze
(
index
=
official_index_name
,
body
=
query_body
)
return
res
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -276,7 +276,7 @@ class ESPerform(object):
if
high_num
>
3
:
logging
.
info
(
"check es_nodes_load high,cpu load:
%
s,ori_cpu_info:
%
s"
%
(
str
(
es_nodes_list
),
str
(
es_nodes_info_list
)))
str
(
es_nodes_list
),
str
(
es_nodes_info_list
)))
return
True
else
:
return
False
...
...
@@ -298,8 +298,8 @@ class ESPerform(object):
functions_list
+=
[
{
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
6
}}
}
},
...
...
@@ -307,8 +307,8 @@ class ESPerform(object):
},
{
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
5
}}
}
},
...
...
@@ -316,8 +316,8 @@ class ESPerform(object):
},
{
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
4
}}
}
},
...
...
@@ -411,7 +411,7 @@ class ESPerform(object):
}
},
"_source"
:
{
"include"
:
[
"id"
,
"user_id"
]
"include"
:
[
"id"
,
"user_id"
]
},
"sort"
:
[
{
"_score"
:
{
"order"
:
"desc"
}},
...
...
@@ -420,7 +420,7 @@ class ESPerform(object):
],
"collapse"
:
{
"field"
:
"user_id"
}
}
}
if
len
(
have_read_topic_id_list
)
>
0
:
...
...
@@ -429,8 +429,7 @@ class ESPerform(object):
"id"
:
have_read_topic_id_list
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic-high-star"
,
query_body
=
q
,
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic-high-star"
,
query_body
=
q
,
offset
=
0
,
size
=
size
,
routing
=
"6"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
...
...
@@ -442,7 +441,7 @@ class ESPerform(object):
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_dict
))
return
topic_id_list
,
topic_id_dict
return
topic_id_list
,
topic_id_dict
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
trans2es/type_info.py
View file @
2010fc66
...
...
@@ -14,7 +14,7 @@ import sys
from
libs.cache
import
redis_client
import
copy
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
,
pictorial
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
,
pictorial
from
trans2es.utils.user_transfer
import
UserTransfer
from
trans2es.utils.pick_celebrity_transfer
import
PickCelebrityTransfer
from
trans2es.utils.group_transfer
import
GroupTransfer
...
...
@@ -196,7 +196,7 @@ class TypeInfo(object):
old_data
[
"is_history"
]
=
True
data_list
.
append
(
old_data
)
if
int_ori_topic_star
>=
4
:
if
int_ori_topic_star
>=
4
:
topic_data_high_star_list
.
append
(
old_data
)
redis_client
.
hset
(
self
.
physical_topic_star
,
data
[
"id"
],
data
[
"content_level"
])
# data_list = [
...
...
@@ -207,11 +207,12 @@ class TypeInfo(object):
# ]
# ESPerform.es_helpers_bulk(ESPerform.get_cli(), data_list, "topic-star")
if
data
[
"content_level"
]
and
int
(
data
[
"content_level"
])
>=
4
:
if
data
[
"content_level"
]
and
int
(
data
[
"content_level"
])
>=
4
:
topic_data_high_star_list
.
append
(
data
)
data_list
.
append
(
data
)
return
(
data_list
,
topic_data_high_star_list
)
return
(
data_list
,
topic_data_high_star_list
)
def
elasticsearch_bulk_insert_data
(
self
,
sub_index_name
,
data_list
,
es
=
None
):
...
...
@@ -254,17 +255,17 @@ class TypeInfo(object):
else
:
qs
=
self
.
model
.
objects
.
all
()
end
=
time
.
time
()
time0
=
end
-
begin
time0
=
end
-
begin
begin
=
time
.
time
()
instance_list
=
qs
.
filter
(
pk__in
=
pk_list
)
end
=
time
.
time
()
time1
=
end
-
begin
time1
=
end
-
begin
begin
=
time
.
time
()
data_list
,
topic_data_high_star_list
=
self
.
bulk_get_data
(
instance_list
)
end
=
time
.
time
()
time2
=
end
-
begin
time2
=
end
-
begin
begin
=
time
.
time
()
# logging.info("get sub_index_name:%s"%sub_index_name)
...
...
@@ -276,7 +277,7 @@ class TypeInfo(object):
es
=
es
,
)
if
sub_index_name
==
"topic"
:
if
sub_index_name
==
"topic"
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-star-routing"
,
data_list
=
data_list
,
...
...
@@ -284,7 +285,7 @@ class TypeInfo(object):
)
# 同时写4星及以上的帖子
if
len
(
topic_data_high_star_list
)
>
0
:
if
len
(
topic_data_high_star_list
)
>
0
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-high-star"
,
data_list
=
topic_data_high_star_list
,
...
...
@@ -292,8 +293,9 @@ class TypeInfo(object):
)
end
=
time
.
time
()
time3
=
end
-
begin
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
time3
=
end
-
begin
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
def
insert_table_chunk
(
self
,
sub_index_name
,
table_chunk
,
es
=
None
):
try
:
...
...
@@ -315,7 +317,7 @@ class TypeInfo(object):
auto_create_index
=
True
)
logging
.
info
(
"es_helpers_bulk,sub_index_name:
%
s,data_list len:
%
d"
%
(
sub_index_name
,
len
(
data_list
)))
logging
.
info
(
"es_helpers_bulk,sub_index_name:
%
s,data_list len:
%
d"
%
(
sub_index_name
,
len
(
data_list
)))
stage_3_time
=
time
.
time
()
end_clock
=
time
.
clock
()
...
...
@@ -353,8 +355,8 @@ def get_type_info_map():
name
=
'topic-star'
,
type
=
'topic-star'
,
model
=
topic
.
Topic
,
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
#
假的
get_data_func
=
TopicTransfer
.
get_topic_data
,
#
假的
query_deferred
=
lambda
:
topic
.
Topic
.
objects
.
all
()
.
query
,
#
假的
get_data_func
=
TopicTransfer
.
get_topic_data
,
#
假的
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
...
...
@@ -523,3 +525,4 @@ def get_type_info_map():
_get_type_info_map_result
=
type_info_map
return
type_info_map
trans2es/utils/tag_transfer.py
View file @
2010fc66
...
...
@@ -8,15 +8,16 @@ import logging
import
traceback
from
libs.tools
import
tzlc
from
trans2es.models.topic
import
Topic
from
trans2es.models.tag
import
TopicTag
,
CommunityTagType
,
CommunityTagTypeRelation
from
trans2es.models.tag
import
TopicTag
,
CommunityTagType
,
CommunityTagTypeRelation
import
datetime
from
django.conf
import
settings
class
TagTransfer
(
object
):
@classmethod
def
get_tag_name_data
(
cls
,
instance
):
def
get_tag_name_data
(
cls
,
instance
):
try
:
res
=
dict
()
res
[
"name"
]
=
instance
.
name
...
...
@@ -27,7 +28,7 @@ class TagTransfer(object):
return
dict
()
@classmethod
def
get_tag_data
(
cls
,
instance
):
def
get_tag_data
(
cls
,
instance
):
try
:
res
=
dict
()
...
...
@@ -35,14 +36,14 @@ class TagTransfer(object):
tag_name_terms_list
=
list
()
for
i
in
range
(
len
(
instance
.
name
)):
for
j
in
range
(
i
,
len
(
instance
.
name
)
+
1
):
for
j
in
range
(
i
,
len
(
instance
.
name
)
+
1
):
name_term
=
instance
.
name
[
i
:
j
]
.
strip
()
if
name_term
:
tag_name_terms_list
.
append
(
name_term
.
lower
())
res
[
"suggest"
]
=
{
"input"
:
tag_name_terms_list
,
"contexts"
:
{
"input"
:
tag_name_terms_list
,
"contexts"
:{
"is_online"
:
[
instance
.
is_online
],
"is_deleted"
:
[
instance
.
is_deleted
]
}
...
...
@@ -53,27 +54,24 @@ class TagTransfer(object):
topic_num
=
0
res
[
"near_new_topic_num"
]
=
topic_num
if
instance
.
is_online
==
True
and
instance
.
is_deleted
==
False
:
if
instance
.
is_online
==
True
and
instance
.
is_deleted
==
False
:
topic_id_list
=
list
()
sql_result_results
=
list
(
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"topic_id"
,
"is_online"
))
for
topic_id
,
is_online
in
sql_result_results
:
for
topic_id
,
is_online
in
sql_result_results
:
if
is_online
:
topic_id_list
.
append
(
topic_id
)
time_base_val
=
datetime
.
datetime
.
strftime
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
-
7
),
"
%
Y-
%
m-
%
d"
)
time_base_val
=
datetime
.
datetime
.
strftime
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
-
7
),
"
%
Y-
%
m-
%
d"
)
for
topic_begin_index
in
range
(
0
,
len
(
topic_id_list
),
100
):
cur_topic_num
=
Topic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
topic_id_list
[
topic_begin_index
:
topic_begin_index
+
100
],
create_time__gte
=
time_base_val
)
.
count
()
for
topic_begin_index
in
range
(
0
,
len
(
topic_id_list
),
100
):
cur_topic_num
=
Topic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id__in
=
topic_id_list
[
topic_begin_index
:
topic_begin_index
+
100
],
create_time__gte
=
time_base_val
)
.
count
()
topic_num
+=
cur_topic_num
res
[
"near_new_topic_num"
]
=
topic_num
tag_type_sql_list
=
CommunityTagTypeRelation
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"tag_type_id"
,
flat
=
True
)
tag_type_sql_list
=
CommunityTagTypeRelation
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
tag_id
=
instance
.
id
)
.
values_list
(
"tag_type_id"
,
flat
=
True
)
tag_type_list
=
list
()
for
tag_type_id
in
tag_type_sql_list
:
tag_type_list
.
append
(
tag_type_id
)
...
...
@@ -85,4 +83,4 @@ class TagTransfer(object):
return
res
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
return
dict
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment