Commit 2010fc66 authored by lixiaofang's avatar lixiaofang

合并hotword

parents 3251c6a9 414af3cf
......@@ -155,9 +155,9 @@ class ESPerform(object):
bulk_actions = []
if sub_index_name == "topic" or \
sub_index_name == "topic-star-routing" or \
sub_index_name == "topic-high-star":
if sub_index_name=="topic" or \
sub_index_name=="topic-star-routing" or \
sub_index_name=="topic-high-star":
for data in data_list:
if data:
bulk_actions.append({
......@@ -232,7 +232,7 @@ class ESPerform(object):
return {"total_count": 0, "hits": []}
@classmethod
def get_analyze_results(cls, es_cli, sub_index_name, query_body):
def get_analyze_results(cls,es_cli, sub_index_name, query_body):
try:
assert (es_cli is not None)
......@@ -242,7 +242,7 @@ class ESPerform(object):
logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None
res = es_cli.indices.analyze(index=official_index_name, body=query_body)
res = es_cli.indices.analyze(index=official_index_name,body=query_body)
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -276,7 +276,7 @@ class ESPerform(object):
if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % (
str(es_nodes_list), str(es_nodes_info_list)))
str(es_nodes_list), str(es_nodes_info_list)))
return True
else:
return False
......@@ -298,8 +298,8 @@ class ESPerform(object):
functions_list += [
{
"filter": {
"constant_score": {
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 6}}
}
},
......@@ -307,8 +307,8 @@ class ESPerform(object):
},
{
"filter": {
"constant_score": {
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 5}}
}
},
......@@ -316,8 +316,8 @@ class ESPerform(object):
},
{
"filter": {
"constant_score": {
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 4}}
}
},
......@@ -411,7 +411,7 @@ class ESPerform(object):
}
},
"_source": {
"include": ["id", "user_id"]
"include": ["id","user_id"]
},
"sort": [
{"_score": {"order": "desc"}},
......@@ -420,7 +420,7 @@ class ESPerform(object):
],
"collapse": {
"field": "user_id"
}
}
}
if len(have_read_topic_id_list) > 0:
......@@ -429,8 +429,7 @@ class ESPerform(object):
"id": have_read_topic_id_list
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star",
query_body=q,
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
offset=0, size=size, routing="6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
......@@ -442,7 +441,7 @@ class ESPerform(object):
logging.info("topic_id_list:%s" % str(topic_id_dict))
return topic_id_list, topic_id_dict
return topic_id_list,topic_id_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
......@@ -14,7 +14,7 @@ import sys
from libs.cache import redis_client
import copy
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar, pictorial
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer
......@@ -196,7 +196,7 @@ class TypeInfo(object):
old_data["is_history"] = True
data_list.append(old_data)
if int_ori_topic_star >= 4:
if int_ori_topic_star>=4:
topic_data_high_star_list.append(old_data)
redis_client.hset(self.physical_topic_star, data["id"], data["content_level"])
# data_list = [
......@@ -207,11 +207,12 @@ class TypeInfo(object):
# ]
# ESPerform.es_helpers_bulk(ESPerform.get_cli(), data_list, "topic-star")
if data["content_level"] and int(data["content_level"]) >= 4:
if data["content_level"] and int(data["content_level"])>=4:
topic_data_high_star_list.append(data)
data_list.append(data)
return (data_list, topic_data_high_star_list)
return (data_list,topic_data_high_star_list)
def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None):
......@@ -254,17 +255,17 @@ class TypeInfo(object):
else:
qs = self.model.objects.all()
end = time.time()
time0 = end - begin
time0=end-begin
begin = time.time()
instance_list = qs.filter(pk__in=pk_list)
end = time.time()
time1 = end - begin
time1=end-begin
begin = time.time()
data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
end = time.time()
time2 = end - begin
time2=end-begin
begin = time.time()
# logging.info("get sub_index_name:%s"%sub_index_name)
......@@ -276,7 +277,7 @@ class TypeInfo(object):
es=es,
)
if sub_index_name == "topic":
if sub_index_name=="topic":
self.elasticsearch_bulk_insert_data(
sub_index_name="topic-star-routing",
data_list=data_list,
......@@ -284,7 +285,7 @@ class TypeInfo(object):
)
# 同时写4星及以上的帖子
if len(topic_data_high_star_list) > 0:
if len(topic_data_high_star_list)>0:
self.elasticsearch_bulk_insert_data(
sub_index_name="topic-high-star",
data_list=topic_data_high_star_list,
......@@ -292,8 +293,9 @@ class TypeInfo(object):
)
end = time.time()
time3 = end - begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0, time1, time2, time3))
time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
try:
......@@ -315,7 +317,7 @@ class TypeInfo(object):
auto_create_index=True
)
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name, len(data_list)))
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
stage_3_time = time.time()
end_clock = time.clock()
......@@ -353,8 +355,8 @@ def get_type_info_map():
name='topic-star',
type='topic-star',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query, # 假的
get_data_func=TopicTransfer.get_topic_data, # 假的
query_deferred=lambda: topic.Topic.objects.all().query,#假的
get_data_func=TopicTransfer.get_topic_data,#假的
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
......@@ -523,3 +525,4 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map
return type_info_map
......@@ -8,15 +8,16 @@ import logging
import traceback
from libs.tools import tzlc
from trans2es.models.topic import Topic
from trans2es.models.tag import TopicTag, CommunityTagType, CommunityTagTypeRelation
from trans2es.models.tag import TopicTag,CommunityTagType,CommunityTagTypeRelation
import datetime
from django.conf import settings
class TagTransfer(object):
@classmethod
def get_tag_name_data(cls, instance):
def get_tag_name_data(cls,instance):
try:
res = dict()
res["name"] = instance.name
......@@ -27,7 +28,7 @@ class TagTransfer(object):
return dict()
@classmethod
def get_tag_data(cls, instance):
def get_tag_data(cls,instance):
try:
res = dict()
......@@ -35,14 +36,14 @@ class TagTransfer(object):
tag_name_terms_list = list()
for i in range(len(instance.name)):
for j in range(i, len(instance.name) + 1):
for j in range(i,len(instance.name)+1):
name_term = instance.name[i:j].strip()
if name_term:
tag_name_terms_list.append(name_term.lower())
res["suggest"] = {
"input": tag_name_terms_list,
"contexts": {
"input":tag_name_terms_list,
"contexts":{
"is_online": [instance.is_online],
"is_deleted": [instance.is_deleted]
}
......@@ -53,27 +54,24 @@ class TagTransfer(object):
topic_num = 0
res["near_new_topic_num"] = topic_num
if instance.is_online == True and instance.is_deleted == False:
if instance.is_online==True and instance.is_deleted==False:
topic_id_list = list()
sql_result_results = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(
tag_id=instance.id).values_list("topic_id", "is_online"))
for topic_id, is_online in sql_result_results:
for topic_id,is_online in sql_result_results:
if is_online:
topic_id_list.append(topic_id)
time_base_val = datetime.datetime.strftime(datetime.datetime.now() + datetime.timedelta(-7), "%Y-%m-%d")
time_base_val = datetime.datetime.strftime(datetime.datetime.now()+datetime.timedelta(-7), "%Y-%m-%d")
for topic_begin_index in range(0, len(topic_id_list), 100):
cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter(
id__in=topic_id_list[topic_begin_index:topic_begin_index + 100],
create_time__gte=time_base_val).count()
for topic_begin_index in range(0,len(topic_id_list),100):
cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_id_list[topic_begin_index:topic_begin_index+100],create_time__gte=time_base_val).count()
topic_num += cur_topic_num
res["near_new_topic_num"] = topic_num
tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter(
tag_id=instance.id).values_list("tag_type_id", flat=True)
tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter(tag_id=instance.id).values_list("tag_type_id",flat=True)
tag_type_list = list()
for tag_type_id in tag_type_sql_list:
tag_type_list.append(tag_type_id)
......@@ -85,4 +83,4 @@ class TagTransfer(object):
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
return dict()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment