Commit 2010fc66 authored by lixiaofang's avatar lixiaofang

合并hotword

parents 3251c6a9 414af3cf
...@@ -155,9 +155,9 @@ class ESPerform(object): ...@@ -155,9 +155,9 @@ class ESPerform(object):
bulk_actions = [] bulk_actions = []
if sub_index_name == "topic" or \ if sub_index_name=="topic" or \
sub_index_name == "topic-star-routing" or \ sub_index_name=="topic-star-routing" or \
sub_index_name == "topic-high-star": sub_index_name=="topic-high-star":
for data in data_list: for data in data_list:
if data: if data:
bulk_actions.append({ bulk_actions.append({
...@@ -232,7 +232,7 @@ class ESPerform(object): ...@@ -232,7 +232,7 @@ class ESPerform(object):
return {"total_count": 0, "hits": []} return {"total_count": 0, "hits": []}
@classmethod @classmethod
def get_analyze_results(cls, es_cli, sub_index_name, query_body): def get_analyze_results(cls,es_cli, sub_index_name, query_body):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
...@@ -242,7 +242,7 @@ class ESPerform(object): ...@@ -242,7 +242,7 @@ class ESPerform(object):
logging.error("index:%s is not existing,get_search_results error!" % official_index_name) logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None return None
res = es_cli.indices.analyze(index=official_index_name, body=query_body) res = es_cli.indices.analyze(index=official_index_name,body=query_body)
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -276,7 +276,7 @@ class ESPerform(object): ...@@ -276,7 +276,7 @@ class ESPerform(object):
if high_num > 3: if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % ( logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % (
str(es_nodes_list), str(es_nodes_info_list))) str(es_nodes_list), str(es_nodes_info_list)))
return True return True
else: else:
return False return False
...@@ -298,8 +298,8 @@ class ESPerform(object): ...@@ -298,8 +298,8 @@ class ESPerform(object):
functions_list += [ functions_list += [
{ {
"filter": { "filter": {
"constant_score": { "constant_score":{
"filter": { "filter":{
"term": {"content_level": 6}} "term": {"content_level": 6}}
} }
}, },
...@@ -307,8 +307,8 @@ class ESPerform(object): ...@@ -307,8 +307,8 @@ class ESPerform(object):
}, },
{ {
"filter": { "filter": {
"constant_score": { "constant_score":{
"filter": { "filter":{
"term": {"content_level": 5}} "term": {"content_level": 5}}
} }
}, },
...@@ -316,8 +316,8 @@ class ESPerform(object): ...@@ -316,8 +316,8 @@ class ESPerform(object):
}, },
{ {
"filter": { "filter": {
"constant_score": { "constant_score":{
"filter": { "filter":{
"term": {"content_level": 4}} "term": {"content_level": 4}}
} }
}, },
...@@ -411,7 +411,7 @@ class ESPerform(object): ...@@ -411,7 +411,7 @@ class ESPerform(object):
} }
}, },
"_source": { "_source": {
"include": ["id", "user_id"] "include": ["id","user_id"]
}, },
"sort": [ "sort": [
{"_score": {"order": "desc"}}, {"_score": {"order": "desc"}},
...@@ -420,7 +420,7 @@ class ESPerform(object): ...@@ -420,7 +420,7 @@ class ESPerform(object):
], ],
"collapse": { "collapse": {
"field": "user_id" "field": "user_id"
} }
} }
if len(have_read_topic_id_list) > 0: if len(have_read_topic_id_list) > 0:
...@@ -429,8 +429,7 @@ class ESPerform(object): ...@@ -429,8 +429,7 @@ class ESPerform(object):
"id": have_read_topic_id_list "id": have_read_topic_id_list
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
query_body=q,
offset=0, size=size, routing="6") offset=0, size=size, routing="6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
...@@ -442,7 +441,7 @@ class ESPerform(object): ...@@ -442,7 +441,7 @@ class ESPerform(object):
logging.info("topic_id_list:%s" % str(topic_id_dict)) logging.info("topic_id_list:%s" % str(topic_id_dict))
return topic_id_list, topic_id_dict return topic_id_list,topic_id_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -14,7 +14,7 @@ import sys ...@@ -14,7 +14,7 @@ import sys
from libs.cache import redis_client from libs.cache import redis_client
import copy import copy
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar, pictorial from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer from trans2es.utils.group_transfer import GroupTransfer
...@@ -196,7 +196,7 @@ class TypeInfo(object): ...@@ -196,7 +196,7 @@ class TypeInfo(object):
old_data["is_history"] = True old_data["is_history"] = True
data_list.append(old_data) data_list.append(old_data)
if int_ori_topic_star >= 4: if int_ori_topic_star>=4:
topic_data_high_star_list.append(old_data) topic_data_high_star_list.append(old_data)
redis_client.hset(self.physical_topic_star, data["id"], data["content_level"]) redis_client.hset(self.physical_topic_star, data["id"], data["content_level"])
# data_list = [ # data_list = [
...@@ -207,11 +207,12 @@ class TypeInfo(object): ...@@ -207,11 +207,12 @@ class TypeInfo(object):
# ] # ]
# ESPerform.es_helpers_bulk(ESPerform.get_cli(), data_list, "topic-star") # ESPerform.es_helpers_bulk(ESPerform.get_cli(), data_list, "topic-star")
if data["content_level"] and int(data["content_level"]) >= 4:
if data["content_level"] and int(data["content_level"])>=4:
topic_data_high_star_list.append(data) topic_data_high_star_list.append(data)
data_list.append(data) data_list.append(data)
return (data_list, topic_data_high_star_list) return (data_list,topic_data_high_star_list)
def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None): def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None):
...@@ -254,17 +255,17 @@ class TypeInfo(object): ...@@ -254,17 +255,17 @@ class TypeInfo(object):
else: else:
qs = self.model.objects.all() qs = self.model.objects.all()
end = time.time() end = time.time()
time0 = end - begin time0=end-begin
begin = time.time() begin = time.time()
instance_list = qs.filter(pk__in=pk_list) instance_list = qs.filter(pk__in=pk_list)
end = time.time() end = time.time()
time1 = end - begin time1=end-begin
begin = time.time() begin = time.time()
data_list, topic_data_high_star_list = self.bulk_get_data(instance_list) data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
end = time.time() end = time.time()
time2 = end - begin time2=end-begin
begin = time.time() begin = time.time()
# logging.info("get sub_index_name:%s"%sub_index_name) # logging.info("get sub_index_name:%s"%sub_index_name)
...@@ -276,7 +277,7 @@ class TypeInfo(object): ...@@ -276,7 +277,7 @@ class TypeInfo(object):
es=es, es=es,
) )
if sub_index_name == "topic": if sub_index_name=="topic":
self.elasticsearch_bulk_insert_data( self.elasticsearch_bulk_insert_data(
sub_index_name="topic-star-routing", sub_index_name="topic-star-routing",
data_list=data_list, data_list=data_list,
...@@ -284,7 +285,7 @@ class TypeInfo(object): ...@@ -284,7 +285,7 @@ class TypeInfo(object):
) )
# 同时写4星及以上的帖子 # 同时写4星及以上的帖子
if len(topic_data_high_star_list) > 0: if len(topic_data_high_star_list)>0:
self.elasticsearch_bulk_insert_data( self.elasticsearch_bulk_insert_data(
sub_index_name="topic-high-star", sub_index_name="topic-high-star",
data_list=topic_data_high_star_list, data_list=topic_data_high_star_list,
...@@ -292,8 +293,9 @@ class TypeInfo(object): ...@@ -292,8 +293,9 @@ class TypeInfo(object):
) )
end = time.time() end = time.time()
time3 = end - begin time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0, time1, time2, time3)) logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
def insert_table_chunk(self, sub_index_name, table_chunk, es=None): def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
try: try:
...@@ -315,7 +317,7 @@ class TypeInfo(object): ...@@ -315,7 +317,7 @@ class TypeInfo(object):
auto_create_index=True auto_create_index=True
) )
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name, len(data_list))) logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
stage_3_time = time.time() stage_3_time = time.time()
end_clock = time.clock() end_clock = time.clock()
...@@ -353,8 +355,8 @@ def get_type_info_map(): ...@@ -353,8 +355,8 @@ def get_type_info_map():
name='topic-star', name='topic-star',
type='topic-star', type='topic-star',
model=topic.Topic, model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query, # 假的 query_deferred=lambda: topic.Topic.objects.all().query,#假的
get_data_func=TopicTransfer.get_topic_data, # 假的 get_data_func=TopicTransfer.get_topic_data,#假的
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
...@@ -523,3 +525,4 @@ def get_type_info_map(): ...@@ -523,3 +525,4 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map _get_type_info_map_result = type_info_map
return type_info_map return type_info_map
...@@ -8,15 +8,16 @@ import logging ...@@ -8,15 +8,16 @@ import logging
import traceback import traceback
from libs.tools import tzlc from libs.tools import tzlc
from trans2es.models.topic import Topic from trans2es.models.topic import Topic
from trans2es.models.tag import TopicTag, CommunityTagType, CommunityTagTypeRelation from trans2es.models.tag import TopicTag,CommunityTagType,CommunityTagTypeRelation
import datetime import datetime
from django.conf import settings from django.conf import settings
class TagTransfer(object): class TagTransfer(object):
@classmethod @classmethod
def get_tag_name_data(cls, instance): def get_tag_name_data(cls,instance):
try: try:
res = dict() res = dict()
res["name"] = instance.name res["name"] = instance.name
...@@ -27,7 +28,7 @@ class TagTransfer(object): ...@@ -27,7 +28,7 @@ class TagTransfer(object):
return dict() return dict()
@classmethod @classmethod
def get_tag_data(cls, instance): def get_tag_data(cls,instance):
try: try:
res = dict() res = dict()
...@@ -35,14 +36,14 @@ class TagTransfer(object): ...@@ -35,14 +36,14 @@ class TagTransfer(object):
tag_name_terms_list = list() tag_name_terms_list = list()
for i in range(len(instance.name)): for i in range(len(instance.name)):
for j in range(i, len(instance.name) + 1): for j in range(i,len(instance.name)+1):
name_term = instance.name[i:j].strip() name_term = instance.name[i:j].strip()
if name_term: if name_term:
tag_name_terms_list.append(name_term.lower()) tag_name_terms_list.append(name_term.lower())
res["suggest"] = { res["suggest"] = {
"input": tag_name_terms_list, "input":tag_name_terms_list,
"contexts": { "contexts":{
"is_online": [instance.is_online], "is_online": [instance.is_online],
"is_deleted": [instance.is_deleted] "is_deleted": [instance.is_deleted]
} }
...@@ -53,27 +54,24 @@ class TagTransfer(object): ...@@ -53,27 +54,24 @@ class TagTransfer(object):
topic_num = 0 topic_num = 0
res["near_new_topic_num"] = topic_num res["near_new_topic_num"] = topic_num
if instance.is_online == True and instance.is_deleted == False: if instance.is_online==True and instance.is_deleted==False:
topic_id_list = list() topic_id_list = list()
sql_result_results = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter( sql_result_results = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(
tag_id=instance.id).values_list("topic_id", "is_online")) tag_id=instance.id).values_list("topic_id", "is_online"))
for topic_id, is_online in sql_result_results: for topic_id,is_online in sql_result_results:
if is_online: if is_online:
topic_id_list.append(topic_id) topic_id_list.append(topic_id)
time_base_val = datetime.datetime.strftime(datetime.datetime.now() + datetime.timedelta(-7), "%Y-%m-%d") time_base_val = datetime.datetime.strftime(datetime.datetime.now()+datetime.timedelta(-7), "%Y-%m-%d")
for topic_begin_index in range(0, len(topic_id_list), 100): for topic_begin_index in range(0,len(topic_id_list),100):
cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter( cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_id_list[topic_begin_index:topic_begin_index+100],create_time__gte=time_base_val).count()
id__in=topic_id_list[topic_begin_index:topic_begin_index + 100],
create_time__gte=time_base_val).count()
topic_num += cur_topic_num topic_num += cur_topic_num
res["near_new_topic_num"] = topic_num res["near_new_topic_num"] = topic_num
tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter( tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter(tag_id=instance.id).values_list("tag_type_id",flat=True)
tag_id=instance.id).values_list("tag_type_id", flat=True)
tag_type_list = list() tag_type_list = list()
for tag_type_id in tag_type_sql_list: for tag_type_id in tag_type_sql_list:
tag_type_list.append(tag_type_id) tag_type_list.append(tag_type_id)
...@@ -85,4 +83,4 @@ class TagTransfer(object): ...@@ -85,4 +83,4 @@ class TagTransfer(object):
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return dict()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment