Commit 3665c234 authored by zhanglu's avatar zhanglu

Merge branch 'feature/virtual_vote_num' into 'dev'

Feature/virtual vote num

See merge request !217
parents 165eded0 66943362
...@@ -58,6 +58,7 @@ docs/_build/ ...@@ -58,6 +58,7 @@ docs/_build/
# config # config
fabfile.py fabfile.py
settings.online.py settings.online.py
/gaia/settings.py
settings_local.py settings_local.py
media/ media/
log/ log/
...@@ -69,3 +70,16 @@ Vagrantfile ...@@ -69,3 +70,16 @@ Vagrantfile
*.DS_Store *.DS_Store
dump.rdb dump.rdb
# .gitignore for yangchuncheng
api/management/commands/ycc*
settings_override*
.script/
.tmp.sql
.env
*.pem
/gaia/hospital_list_settings.py
coverage_html/
gaia/rpcd.json
*.swp
dbmw_deploy/config.dir/
This diff is collapsed.
...@@ -21,7 +21,6 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -21,7 +21,6 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info = type_info_map[es_type] type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type)) logging.info("duan add,es_type:%s" % str(es_type))
logging.info("get es_type:%s"%es_type)
type_info.insert_table_by_pk_list( type_info.insert_table_by_pk_list(
sub_index_name=es_type, sub_index_name=es_type,
pk_list=pk_list, pk_list=pk_list,
......
...@@ -111,10 +111,6 @@ class ESPerform(object): ...@@ -111,10 +111,6 @@ class ESPerform(object):
return False return False
mapping_dict = cls.__load_mapping(sub_index_name) mapping_dict = cls.__load_mapping(sub_index_name)
logging.info("get write_alias_name:%s"%write_alias_name)
logging.info("get mapping_dict:%s"%mapping_dict)
logging.info("get mapping_type:%s"%mapping_type)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type) es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True return True
...@@ -158,14 +154,26 @@ class ESPerform(object): ...@@ -158,14 +154,26 @@ class ESPerform(object):
cls.put_index_mapping(es_cli,sub_index_name) cls.put_index_mapping(es_cli,sub_index_name)
bulk_actions = [] bulk_actions = []
for data in data_list:
bulk_actions.append({ if sub_index_name=="topic":
'_op_type': 'index', for data in data_list:
'_index': official_index_name, bulk_actions.append({
'_type': doc_type, '_op_type': 'index',
'_id': data['id'], '_index': official_index_name,
'_source': data, '_type': doc_type,
}) '_id': data['id'],
'_source': data,
'routing': data["content_level"]
})
else:
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions) elasticsearch.helpers.bulk(es_cli,bulk_actions)
return True return True
...@@ -226,17 +234,22 @@ class ESPerform(object): ...@@ -226,17 +234,22 @@ class ESPerform(object):
for item in es_nodes_info_list: for item in es_nodes_info_list:
try: try:
item_list = item.split(" ") item_list = item.split(" ")
if len(item_list)>4: if len(item_list)==11:
cpu_load = item_list[4]
elif len(item_list)==10:
cpu_load = item_list[3] cpu_load = item_list[3]
if int(cpu_load) > 60: else:
high_num += 1 continue
es_nodes_list.append(int(cpu_load)) int_cpu_load = int(cpu_load)
if int_cpu_load > 60:
high_num += 1
es_nodes_list.append(int_cpu_load)
except: except:
logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc())) logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc()))
return True return True
if high_num > 3: if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s" % str(es_nodes_info_list)) logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % (str(es_nodes_list), str(es_nodes_info_list)))
return True return True
else: else:
return False return False
......
...@@ -9,6 +9,9 @@ import json ...@@ -9,6 +9,9 @@ import json
from trans2es.models.tag import TopicTag from trans2es.models.tag import TopicTag
import traceback import traceback
from django.conf import settings from django.conf import settings
from libs.es import ESPerform
from search.utils.common import *
class KafkaManager(object): class KafkaManager(object):
consumser_obj = None consumser_obj = None
...@@ -28,6 +31,8 @@ class CollectData(object): ...@@ -28,6 +31,8 @@ class CollectData(object):
def __init__(self): def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:" self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:" self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
self.linucb_recommend_topic_id_prefix = "physical:linucb:topic_recommend:device_id:"
self.tag_topic_id_redis_prefix = "physical:tag_id:topic_id_list:"
# 默认 # 默认
self.user_feature = [0,1] self.user_feature = [0,1]
...@@ -44,24 +49,88 @@ class CollectData(object): ...@@ -44,24 +49,88 @@ class CollectData(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return dict()
def get_tag_topic_list(self,tag_id):
try:
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}}
]
}
},
"_source":{
"include":["id"]
},
"sort":[
{"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}},
]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic-high-star", query_body=q,
offset=0, size=5000)
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
def update_recommend_tag_list(self, device_id,user_feature=None): def update_recommend_tag_list(self, device_id,user_feature=None):
try: try:
recommend_tag_set = set()
recommend_tag_list = list() recommend_tag_list = list()
recommend_tag_dict = dict()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id) redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0: if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list() recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list) LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else: else:
user_feature = user_feature if user_feature else self.user_feature user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys())) (recommend_tag_dict,recommend_tag_set) = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list))) if len(recommend_tag_dict) > 0:
if len(recommend_tag_list) > 0: recommend_tag_list = list(recommend_tag_set)
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id) tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list)) redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持 # Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60) redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(TopicPageType.HOME_RECOMMEND)
have_read_topic_id_list = list()
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
recommend_topic_id_list = list()
for index in range(0,1000):
for tag_id in recommend_tag_list[0:5]:
redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = self.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
if len(tag_topic_id_list)>index:
for topic_id in tag_topic_id_list[index:]:
if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
return True return True
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -98,9 +167,10 @@ class CollectData(object): ...@@ -98,9 +167,10 @@ class CollectData(object):
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id))) logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
tag_list = list() tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id=topic_id) click_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online")
for sql_item in sql_query_results: for tag_id,is_online in click_sql_query_results:
tag_list.append(sql_item.tag_id) if is_online:
tag_list.append(tag_id)
is_click = 1 is_click = 1
is_vote = 0 is_vote = 0
...@@ -130,17 +200,19 @@ class CollectData(object): ...@@ -130,17 +200,19 @@ class CollectData(object):
exposure_topic_id = item["card_id"] exposure_topic_id = item["card_id"]
logging.info( logging.info(
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id))) "consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
exposure_topic_id_list.append(exposure_topic_id) if exposure_topic_id:
exposure_topic_id_list.append(exposure_topic_id)
topic_tag_id_dict = dict() topic_tag_id_dict = dict()
tag_list = list() tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id__in=exposure_topic_id_list) exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online")
for sql_item in sql_query_results: for topic_id,tag_id,is_online in exposure_sql_query_results:
tag_list.append(sql_item.tag_id) if is_online:
tag_list.append(tag_id)
if sql_item.topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id] = list() if topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id].append(sql_item.tag_id) topic_tag_id_dict[topic_id] = list()
topic_tag_id_dict[topic_id].append(tag_id)
is_click = 0 is_click = 0
is_vote = 0 is_vote = 0
......
...@@ -9,12 +9,14 @@ import logging ...@@ -9,12 +9,14 @@ import logging
import traceback import traceback
import json import json
import pickle import pickle
from django.conf import settings
class LinUCB: class LinUCB:
d = 2 d = 2
alpha = 0.25 alpha = 0.01
r1 = 1 r1 = 10
r0 = -0.5 r0 = -0.1
default_tag_list = list() default_tag_list = list()
@classmethod @classmethod
...@@ -22,12 +24,9 @@ class LinUCB: ...@@ -22,12 +24,9 @@ class LinUCB:
try: try:
if len(cls.default_tag_list) == 0: if len(cls.default_tag_list) == 0:
query_item_results = Tag.objects.filter(is_online=True) cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
for item in query_item_results:
cls.default_tag_list.append(item.id)
return cls.default_tag_list[:20] return cls.default_tag_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -72,6 +71,7 @@ class LinUCB: ...@@ -72,6 +71,7 @@ class LinUCB:
top_tag_set = set() top_tag_set = set()
top_tag_dict = dict()
np_score_list = list() np_score_list = list()
np_score_dict = dict() np_score_dict = dict()
...@@ -86,16 +86,20 @@ class LinUCB: ...@@ -86,16 +86,20 @@ class LinUCB:
sorted_np_score_list = sorted(np_score_list,reverse=True) sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list: for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]: for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8")) tag_id = str(tag_list[top_score_index], encoding="utf-8")
top_tag_dict[tag_id] = top_score
top_tag_set.add(tag_id)
if len(top_tag_dict) >= 20:
break
if len(top_tag_set) >= 10: if len(top_tag_dict) >= 20:
break break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict))) logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set) return (top_tag_dict,top_tag_set)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return ({},())
@classmethod @classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list): def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
......
This diff is collapsed.
...@@ -13,7 +13,7 @@ from libs.es import ESPerform ...@@ -13,7 +13,7 @@ from libs.es import ESPerform
@bind("physical/search/query_pictorial") @bind("physical/search/query_pictorial")
def query_group(query="", offset=0, size=10): def query_pictorial(query="", offset=0, size=10):
""" """
:remark:小组搜索排序策略,缺少排序策略 :remark:小组搜索排序策略,缺少排序策略
:param query: :param query:
...@@ -105,7 +105,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -105,7 +105,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
} }
q["_source"] = { q["_source"] = {
"include": ["id", "pictorial_id", "tag_list"] "includes": ["id", "pictorial_id", "tag_list"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size) result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get result_dict:%s" % result_dict) logging.info("get result_dict:%s" % result_dict)
...@@ -142,7 +142,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -142,7 +142,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
q["_source"] = { q["_source"] = {
"include": ["id", "update_time"] "includes": ["id", "update_time"]
} }
q["sort"] = { q["sort"] = {
'update_time': { 'update_time': {
...@@ -155,8 +155,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -155,8 +155,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
for item in result_dict["hits"]: for item in result_dict["hits"]:
pictorial_id = item["_source"]["id"] pictorial_id = item["_source"]["id"]
pictorial_id_list.append(pictorial_id) pictorial_id_list.append(pictorial_id)
logging.info("get pictorial_id_list:%s" % pictorial_id_list)
logging.info("get topic_tag_list:%s" % topic_tag_list)
if len(pictorial_id_list) < 10: if len(pictorial_id_list) < 10:
num = 10 - len(pictorial_id_list) num = 10 - len(pictorial_id_list)
...@@ -177,7 +175,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -177,7 +175,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
} }
q["_source"] = { q["_source"] = {
"include": ["id", "tag_id"]} "includes": ["id", "tag_id"]}
q["sort"] = { q["sort"] = {
'update_time': { 'update_time': {
'order': 'desc' 'order': 'desc'
...@@ -194,9 +192,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -194,9 +192,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
if id not in pictorial_id_list: if id not in pictorial_id_list:
pictorial_id_list.append(id) # pictorial_id_list.append(id) #
logging.info("get result_dict tag:%s" % result_dict)
logging.info("get pictorial_id_list tag:%s" % pictorial_id_list)
pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10] pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10]
return {"pictorial_ids_list": pictorial_list} return {"pictorial_ids_list": pictorial_list}
......
...@@ -27,7 +27,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy ...@@ -27,7 +27,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size, recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
query_type=query_type, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list) filter_topic_id_list=have_read_topic_id_list,index_type="topic-high-star")
have_read_topic_id_list.extend(recommend_topic_ids) have_read_topic_id_list.extend(recommend_topic_ids)
...@@ -46,45 +46,56 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -46,45 +46,56 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type=TopicPageType.HOME_RECOMMEND): query_type=TopicPageType.HOME_RECOMMEND):
try: try:
if query is None: if query is None:
if user_id == -1: # redis_key = "physical:home_recommend" + ":user_id:" + str(
redis_key = "physical:home_recommend" + ":user_id:" + str( # user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else: else:
if user_id == -1: # redis_key = "physical:home_query" + ":user_id:" + str(
redis_key = "physical:home_query" + ":user_id:" + str( # user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type) redis_key = "physical:home_query" + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list'] redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id) topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
recommend_tag_list = [] # recommend_tag_dict = dict()
tag_recommend_val = redis_client.get(tag_recommend_redis_key) # tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val: # if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8")) # recommend_tag_dict = json.loads(str(tag_recommend_val, encoding="utf-8"))
recommend_topic_list=list()
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
newcursor = cursor + 5
if len(recommend_topic_id_list) > newcursor:
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key,"cursor",newcursor)
recommend_topic_ids = []
have_read_topic_id_list = list() have_read_topic_id_list = list()
if redis_field_val_list[0] and query is None: if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0])) if query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
else:
if offset>0:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id) user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key) redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
user_similar_score_redis_list = json.loads( user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else [] redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
size = size-len(recommend_topic_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size, topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type, single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list, filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list, recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list) user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star")
have_read_group_id_set = set() have_read_group_id_set = set()
have_read_user_id_set = set() have_read_user_id_set = set()
unread_topic_id_dict = dict() unread_topic_id_dict = dict()
...@@ -125,18 +136,19 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -125,18 +136,19 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else: # else:
# break # break
have_read_topic_id_list.extend(topic_id_list) recommend_topic_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000: have_read_topic_id_list.extend(recommend_topic_list)
cut_len = len(have_read_topic_id_list)-5000 if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:] have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = { redis_dict = {
"have_read_topic_list": json.dumps(have_read_topic_id_list), "have_read_topic_list": json.dumps(have_read_topic_id_list),
} }
redis_client.hmset(redis_key, redis_dict) redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟 # 每个session key保存15分钟
redis_client.expire(redis_key, 60 * 60 * 24 * 3) redis_client.expire(redis_key, 60 * 60 * 24 * 30)
return topic_id_list return recommend_topic_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
...@@ -170,11 +182,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic ...@@ -170,11 +182,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
recommend_topic_ids = list() recommend_topic_ids = list()
es_node_load_high_flag = False es_node_load_high_flag = False
try: # try:
es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli()) # es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli())
except: # except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) # logging.error("catch exception,err_msg:%s" % traceback.format_exc())
es_node_load_high_flag = True # es_node_load_high_flag = True
if es_node_load_high_flag: if es_node_load_high_flag:
temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1" temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1"
...@@ -273,7 +285,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic ...@@ -273,7 +285,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id, result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id, topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj) have_read_topic_list, offset, size, es_cli_obj,index_type="topic-high-star")
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list) > 0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
...@@ -347,7 +359,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1 ...@@ -347,7 +359,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
try: try:
must_topic_id_list = list(topic_similarity_score_dict.keys()) must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size, topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list) must_topic_id_list=must_topic_id_list,index_type="topic-high-star")
return {"recommend_topic_ids": topic_id_list} return {"recommend_topic_ids": topic_id_list}
except: except:
......
...@@ -49,6 +49,7 @@ class Job(object): ...@@ -49,6 +49,7 @@ class Job(object):
def __call__(self): def __call__(self):
type_info = get_type_info_map()[self._type_name] type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo) assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk( result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name, sub_index_name=self._sub_index_name,
table_chunk=self._chunk, table_chunk=self._chunk,
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
...@@ -5,12 +5,14 @@ ...@@ -5,12 +5,14 @@
"is_online":{"type":"boolean"},//上线 "is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"}, "is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"}, "vote_num":{"type":"long"},
"total_vote_num":{"type":"long"},
"reply_num":{"type":"long"}, "reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"}, "content_level":{"type":"text"},
"user_id":{"type":"long"}, "user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID "group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性 "tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签 "edit_tag_list":{"type":"long"},//编辑标签
......
...@@ -27,15 +27,15 @@ class Group(models.Model): ...@@ -27,15 +27,15 @@ class Group(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
# #获取前一天4,5星发帖数 #获取前一天4,5星发帖数
# def get_high_quality_topic_num(self): def get_high_quality_topic_num(self):
# yesterday = datetime.datetime.now()-datetime.timedelta(days=1) yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
# yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day) yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
# yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day) yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
# topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count() topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
# return topic_num return topic_num
def detail(self): def detail(self):
result = { result = {
......
from django.db import models
import datetime import datetime
from django.db import models
import logging import logging
import traceback import traceback
...@@ -109,7 +109,6 @@ class Pictorial(models.Model): ...@@ -109,7 +109,6 @@ class Pictorial(models.Model):
tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True) tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True)
for i in tags: for i in tags:
tag_name_list.append(i) tag_name_list.append(i)
logging.info("get tags name i:%s" % i)
return tag_name_list return tag_name_list
......
...@@ -52,9 +52,9 @@ class Topic(models.Model): ...@@ -52,9 +52,9 @@ class Topic(models.Model):
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True) id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
name = models.CharField(verbose_name=u'日记名称', max_length=100) name = models.CharField(verbose_name=u'日记名称', max_length=100)
# group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1) # group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
# group = models.ForeignKey( group = models.ForeignKey(
# Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None, Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
# on_delete=models.CASCADE) on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID') user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.BooleanField(verbose_name=u'是否是视频日记') has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0) drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
...@@ -82,6 +82,14 @@ class Topic(models.Model): ...@@ -82,6 +82,14 @@ class Topic(models.Model):
platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA) platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA)
platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True) platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True)
def get_virtual_vote_num(self):
try:
topic_extra = TopicExtra.object.get(topic_id=self.id)
return topic_extra.virtual_vote_num
except:
return 0
def get_pictorial_id(self): def get_pictorial_id(self):
try: try:
pictorial_id_list =[] pictorial_id_list =[]
...@@ -106,8 +114,7 @@ class Topic(models.Model): ...@@ -106,8 +114,7 @@ class Topic(models.Model):
try: try:
has_image = False has_image = False
query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False, query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False, is_online=True)
is_online=True)
if len(query_list) > 0: if len(query_list) > 0:
has_image = True has_image = True
...@@ -133,13 +140,12 @@ class Topic(models.Model): ...@@ -133,13 +140,12 @@ class Topic(models.Model):
topic_tag_id_list = list() topic_tag_id_list = list()
edit_tag_id_list = list() edit_tag_id_list = list()
tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id", tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id", flat=True)
flat=True)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list) tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list)
for tag_item in tag_query_results: for tag_item in tag_query_results:
is_online = tag_item.is_online is_online=tag_item.is_online
is_deleted = tag_item.is_deleted is_deleted=tag_item.is_deleted
collection = tag_item.collection collection=tag_item.collection
if is_online and not is_deleted: if is_online and not is_deleted:
topic_tag_id_list.append(tag_item.id) topic_tag_id_list.append(tag_item.id)
...@@ -149,7 +155,7 @@ class Topic(models.Model): ...@@ -149,7 +155,7 @@ class Topic(models.Model):
return (topic_tag_id_list, edit_tag_id_list) return (topic_tag_id_list, edit_tag_id_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], []) return ([],[])
def get_tag_name_list(self, tag_id_list): def get_tag_name_list(self, tag_id_list):
try: try:
...@@ -188,12 +194,9 @@ class Topic(models.Model): ...@@ -188,12 +194,9 @@ class Topic(models.Model):
elif self.content_level == '3': elif self.content_level == '3':
offline_score += 2.0 offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
data_type=1).count() click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=3).count()
if exposure_count > 0: if exposure_count > 0:
offline_score += click_count / exposure_count offline_score += click_count / exposure_count
...@@ -224,8 +227,7 @@ class TopicComplaint(models.Model): ...@@ -224,8 +227,7 @@ class TopicComplaint(models.Model):
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True) id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True) user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True)
topic = models.ForeignKey( topic = models.ForeignKey(
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE, Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE, related_name='complaints')
related_name='complaints')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True) is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
...@@ -244,3 +246,15 @@ class PictorialTopic(models.Model): ...@@ -244,3 +246,15 @@ class PictorialTopic(models.Model):
is_online = models.BooleanField(verbose_name=u"是否有效", default=True) is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线') is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除') is_deleted = models.BooleanField(verbose_name=u'是否删除')
class TopicExtra(BaseModel):
"""帖子相关额外信息"""
class Meta:
verbose_name = '帖子额外信息'
app_label = 'community'
db_table = 'topic_extra'
topic_id = models.IntegerField(verbose_name=u"帖子ID",db_index=True)
virtual_vote_num = models.IntegerField(verbose_name="帖子虚拟点赞")
...@@ -34,6 +34,16 @@ class User(models.Model): ...@@ -34,6 +34,16 @@ class User(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
@classmethod
def get_user_nick_name(cls,user_id):
try:
nick_name = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id=user_id).values_list("nick_name").first()
return nick_name[0]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ""
def get_is_recommend_flag(self): def get_is_recommend_flag(self):
is_shadow = False is_shadow = False
is_recommend = False is_recommend = False
......
...@@ -15,7 +15,8 @@ class UserExtra(models.Model): ...@@ -15,7 +15,8 @@ class UserExtra(models.Model):
db_table="user_extra" db_table="user_extra"
id = models.IntegerField(verbose_name="主键ID",primary_key=True) id = models.IntegerField(verbose_name="主键ID",primary_key=True)
user_id = models.BigIntegerField(verbose_name=u"用户ID") user_id = models.CharField(verbose_name=u"用户ID",max_length=100)
is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户") is_shadow = models.BooleanField(verbose_name=u"是否是马甲账户")
is_online = models.BooleanField(verbose_name=u"是否上线") is_online = models.BooleanField(verbose_name=u"是否上线")
is_recommend = models.BooleanField(verbose_name=u"是否推荐") is_recommend = models.BooleanField(verbose_name=u"是否推荐")
......
...@@ -81,6 +81,9 @@ class TypeInfo(object): ...@@ -81,6 +81,9 @@ class TypeInfo(object):
def bulk_get_data(self, instance_iterable): def bulk_get_data(self, instance_iterable):
data_list = [] data_list = []
# 4,5星帖子单独索引
topic_data_high_star_list = list()
if self.batch_get_data_func: if self.batch_get_data_func:
_pk_list = [getattr(instance, 'pk', None) for instance in instance_iterable] _pk_list = [getattr(instance, 'pk', None) for instance in instance_iterable]
not_found_pk_list = [] not_found_pk_list = []
...@@ -137,7 +140,10 @@ class TypeInfo(object): ...@@ -137,7 +140,10 @@ class TypeInfo(object):
)) ))
else: else:
data_list.append(data) data_list.append(data)
return data_list if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
topic_data_high_star_list.append(data)
return (data_list,topic_data_high_star_list)
def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None): def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None):
...@@ -162,10 +168,10 @@ class TypeInfo(object): ...@@ -162,10 +168,10 @@ class TypeInfo(object):
# traceback.print_exc() # traceback.print_exc()
# es_result = 'error' # es_result = 'error'
return ESPerform.es_helpers_bulk(es, data_list, sub_index_name, True) return ESPerform.es_helpers_bulk(es, data_list, sub_index_name)
def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None): def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None):
data_list = self.bulk_get_data(instance_iterable) data_list, topic_data_high_star_list = self.bulk_get_data(instance_iterable)
return self.elasticsearch_bulk_insert_data( return self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name, sub_index_name=sub_index_name,
data_list=data_list, data_list=data_list,
...@@ -188,7 +194,7 @@ class TypeInfo(object): ...@@ -188,7 +194,7 @@ class TypeInfo(object):
time1=end-begin time1=end-begin
begin = time.time() begin = time.time()
data_list = self.bulk_get_data(instance_list) data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
end = time.time() end = time.time()
time2=end-begin time2=end-begin
...@@ -201,6 +207,14 @@ class TypeInfo(object): ...@@ -201,6 +207,14 @@ class TypeInfo(object):
data_list=data_list, data_list=data_list,
es=es, es=es,
) )
# 同时写4星及以上的帖子
if len(topic_data_high_star_list)>0:
self.elasticsearch_bulk_insert_data(
sub_index_name="topic-high-star",
data_list=topic_data_high_star_list,
es=es,
)
end = time.time() end = time.time()
time3=end-begin time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3)) logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
...@@ -215,7 +229,7 @@ class TypeInfo(object): ...@@ -215,7 +229,7 @@ class TypeInfo(object):
stage_1_time = time.time() stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list) data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
stage_2_time = time.time() stage_2_time = time.time()
...@@ -260,7 +274,26 @@ def get_type_info_map(): ...@@ -260,7 +274,26 @@ def get_type_info_map():
return _get_type_info_map_result return _get_type_info_map_result
type_info_list = [ type_info_list = [
TypeInfo(
name='topic-star-routing',
type='topic-star-routing',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='topic-high-star', # >=4星日记
type='topic-high-star',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo( TypeInfo(
name='topic', # 日记 name='topic', # 日记
type='topic', type='topic',
......
...@@ -33,7 +33,7 @@ class GroupTransfer(object): ...@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time) tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time res["update_time"] = tzlc_udpate_time
# res["high_quality_topic_num"] = instance.get_high_quality_topic_num() res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res return res
except: except:
......
...@@ -8,7 +8,7 @@ from libs.tools import tzlc ...@@ -8,7 +8,7 @@ from libs.tools import tzlc
import time import time
import re import re
import datetime import datetime
from trans2es.models.user import User
class TopicTransfer(object): class TopicTransfer(object):
...@@ -27,12 +27,13 @@ class TopicTransfer(object): ...@@ -27,12 +27,13 @@ class TopicTransfer(object):
res["content"] = instance.content res["content"] = instance.content
res["content_level"] = instance.content_level res["content_level"] = instance.content_level
res["user_id"] = instance.user_id res["user_id"] = instance.user_id
res["user_nick_name"] = User.get_user_nick_name(instance.user_id)
# if instance.group: if instance.group:
# res["group_id"] = instance.group.id res["group_id"] = instance.group.id
# else: else:
# res["group_id"] = -1 res["group_id"] = -1
res["share_num"] = instance.share_num
res["pictorial_id"] = instance.get_pictorial_id() res["pictorial_id"] = instance.get_pictorial_id()
...@@ -82,6 +83,20 @@ class TopicTransfer(object): ...@@ -82,6 +83,20 @@ class TopicTransfer(object):
# else: # else:
# res["language_type"] = instance.language_type # res["language_type"] = instance.language_type
res["is_shadow"] = instance.is_shadow
res["is_recommend"] = True if instance.is_recommend else False
res["is_complaint"] = instance.is_complaint
res["virtual_content_level"] = instance.virtual_content_level
res["like_num_crawl"] = instance.like_num_crawl
res["comment_num_crawl"]= instance.comment_num_crawl
res["is_crawl"] = instance.is_crawl
res["platform"] = instance.platform
res["platform_id"] = instance.platform_id
res["drop_score"] = instance.drop_score
res["sort_score"] = instance.sort_score
create_time = instance.create_time create_time = instance.create_time
tzlc_create_time = tzlc(create_time) tzlc_create_time = tzlc(create_time)
...@@ -91,11 +106,13 @@ class TopicTransfer(object): ...@@ -91,11 +106,13 @@ class TopicTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_update_time = tzlc(update_time) tzlc_update_time = tzlc(update_time)
# res["update_time"] = tzlc_update_time res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple())) res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
res["total_vote_num"] = instance.get_virtual_vote_num() + instance.vote_num
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4)) logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None return None
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment