Commit 4d30f085 authored by zhanglu's avatar zhanglu

Merge branch 'master' into 'test'

Master

See merge request !206
parents 2c6284cc 590c40fc
...@@ -58,6 +58,7 @@ docs/_build/ ...@@ -58,6 +58,7 @@ docs/_build/
# config # config
fabfile.py fabfile.py
settings.online.py settings.online.py
/gaia/settings.py
settings_local.py settings_local.py
media/ media/
log/ log/
...@@ -69,3 +70,16 @@ Vagrantfile ...@@ -69,3 +70,16 @@ Vagrantfile
*.DS_Store *.DS_Store
dump.rdb dump.rdb
# .gitignore for yangchuncheng
api/management/commands/ycc*
settings_override*
.script/
.tmp.sql
.env
*.pem
/gaia/hospital_list_settings.py
coverage_html/
gaia/rpcd.json
*.swp
dbmw_deploy/config.dir/
This diff is collapsed.
...@@ -21,7 +21,6 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -21,7 +21,6 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info = type_info_map[es_type] type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type)) logging.info("duan add,es_type:%s" % str(es_type))
logging.info("get es_type:%s"%es_type)
type_info.insert_table_by_pk_list( type_info.insert_table_by_pk_list(
sub_index_name=es_type, sub_index_name=es_type,
pk_list=pk_list, pk_list=pk_list,
......
...@@ -111,10 +111,6 @@ class ESPerform(object): ...@@ -111,10 +111,6 @@ class ESPerform(object):
return False return False
mapping_dict = cls.__load_mapping(sub_index_name) mapping_dict = cls.__load_mapping(sub_index_name)
logging.info("get write_alias_name:%s"%write_alias_name)
logging.info("get mapping_dict:%s"%mapping_dict)
logging.info("get mapping_type:%s"%mapping_type)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type) es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True return True
...@@ -226,17 +222,22 @@ class ESPerform(object): ...@@ -226,17 +222,22 @@ class ESPerform(object):
for item in es_nodes_info_list: for item in es_nodes_info_list:
try: try:
item_list = item.split(" ") item_list = item.split(" ")
if len(item_list)>4: if len(item_list)==11:
cpu_load = item_list[4]
elif len(item_list)==10:
cpu_load = item_list[3] cpu_load = item_list[3]
if int(cpu_load) > 60: else:
high_num += 1 continue
es_nodes_list.append(int(cpu_load)) int_cpu_load = int(cpu_load)
if int_cpu_load > 60:
high_num += 1
es_nodes_list.append(int_cpu_load)
except: except:
logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc())) logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc()))
return True return True
if high_num > 3: if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s" % str(es_nodes_info_list)) logging.info("check es_nodes_load high,cpu load:%s,ori_cpu_info:%s" % (str(es_nodes_list), str(es_nodes_info_list)))
return True return True
else: else:
return False return False
......
...@@ -98,9 +98,10 @@ class CollectData(object): ...@@ -98,9 +98,10 @@ class CollectData(object):
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id))) logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
tag_list = list() tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id=topic_id) click_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online")
for sql_item in sql_query_results: for tag_id,is_online in click_sql_query_results:
tag_list.append(sql_item.tag_id) if is_online:
tag_list.append(tag_id)
is_click = 1 is_click = 1
is_vote = 0 is_vote = 0
...@@ -130,17 +131,19 @@ class CollectData(object): ...@@ -130,17 +131,19 @@ class CollectData(object):
exposure_topic_id = item["card_id"] exposure_topic_id = item["card_id"]
logging.info( logging.info(
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id))) "consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
exposure_topic_id_list.append(exposure_topic_id) if exposure_topic_id:
exposure_topic_id_list.append(exposure_topic_id)
topic_tag_id_dict = dict() topic_tag_id_dict = dict()
tag_list = list() tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id__in=exposure_topic_id_list) exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online")
for sql_item in sql_query_results: for topic_id,tag_id,is_online in exposure_sql_query_results:
tag_list.append(sql_item.tag_id) if is_online:
tag_list.append(tag_id)
if sql_item.topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id] = list() if topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id].append(sql_item.tag_id) topic_tag_id_dict[topic_id] = list()
topic_tag_id_dict[topic_id].append(tag_id)
is_click = 0 is_click = 0
is_vote = 0 is_vote = 0
......
...@@ -9,6 +9,8 @@ import logging ...@@ -9,6 +9,8 @@ import logging
import traceback import traceback
import json import json
import pickle import pickle
from django.conf import settings
class LinUCB: class LinUCB:
d = 2 d = 2
...@@ -22,12 +24,9 @@ class LinUCB: ...@@ -22,12 +24,9 @@ class LinUCB:
try: try:
if len(cls.default_tag_list) == 0: if len(cls.default_tag_list) == 0:
query_item_results = Tag.objects.filter(is_online=True) cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
for item in query_item_results:
cls.default_tag_list.append(item.id)
return cls.default_tag_list[:20] return cls.default_tag_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -87,6 +86,8 @@ class LinUCB: ...@@ -87,6 +86,8 @@ class LinUCB:
for top_score in sorted_np_score_list: for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]: for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8")) top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8"))
if len(top_tag_set) >= 10:
break
if len(top_tag_set) >= 10: if len(top_tag_set) >= 10:
break break
......
...@@ -34,8 +34,6 @@ class TopicUtils(object): ...@@ -34,8 +34,6 @@ class TopicUtils(object):
q["_source"] = { q["_source"] = {
"include": ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"] "include": ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"]
} }
logging.info("get query-------:%s"%q)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)
return result_dict return result_dict
...@@ -122,10 +120,9 @@ class TopicUtils(object): ...@@ -122,10 +120,9 @@ class TopicUtils(object):
return {} return {}
@classmethod @classmethod
def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None, def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE,
query_type=TopicPageType.FIND_PAGE, filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[], user_similar_score_list=[],index_type="topic"):
user_similar_score_list=[]):
""" """
:需增加打散逻辑 :需增加打散逻辑
:remark:获取首页推荐帖子列表 :remark:获取首页推荐帖子列表
...@@ -139,7 +136,6 @@ class TopicUtils(object): ...@@ -139,7 +136,6 @@ class TopicUtils(object):
attention_user_id_list = list() attention_user_id_list = list()
pick_user_id_list = list() pick_user_id_list = list()
# same_group_id_list = list() # same_group_id_list = list()
same_pictorial_id_list = list()
user_tag_list = list() user_tag_list = list()
result_dict = TopicUtils.get_related_user_info(user_id, 0, 1) result_dict = TopicUtils.get_related_user_info(user_id, 0, 1)
...@@ -152,19 +148,13 @@ class TopicUtils(object): ...@@ -152,19 +148,13 @@ class TopicUtils(object):
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"] pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list] pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
logging.info("get result_dict_hits :%s" % result_dict["hits"]) # same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
# same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"] #
# same_group_id_list = [item["user_id"] for item in same_group_user_info_list] # same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list]
# same_group_id_list = same_group_id_list[:100] # same_pictorial_id_list = same_pictorial_id_list[:100]
same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
logging.info("get same_pictorial_user_info_list :%s" % same_pictorial_user_info_list)
same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list]
same_pictorial_id_list = same_pictorial_id_list[:100]
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"] user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
logging.info("get same_pictorial_id_list :%s" % same_pictorial_id_list)
q = dict() q = dict()
q["query"] = dict() q["query"] = dict()
...@@ -215,22 +205,15 @@ class TopicUtils(object): ...@@ -215,22 +205,15 @@ class TopicUtils(object):
"weight": 2 "weight": 2
} }
) )
# if len(same_group_id_list)>0:
# if len(same_pictorial_id_list) > 0:
# functions_list.append( # functions_list.append(
# { # {
# "filter": {"bool": { # "filter": {"bool": {
# "should": {"terms":{"user_id":same_group_id_list}}}}, # "should": {"terms": {"user_id": same_pictorial_id_list}}}},
# "weight": 1 # "weight": 1
# } # }
# ) # )
if len(same_pictorial_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"user_id": same_pictorial_id_list}}}},
"weight": 1
}
)
# query_tag_term_list = cls.___get_should_term_list(user_tag_list) # query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list) > 0: if len(user_tag_list) > 0:
...@@ -255,8 +238,8 @@ class TopicUtils(object): ...@@ -255,8 +238,8 @@ class TopicUtils(object):
"query": { "query": {
"bool": { "bool": {
"filter": [ "filter": [
{"range": {"content_level": {"gte": low_content_level, "lte": 5}}}, # {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image": True}}, {"term": {"has_image":True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
...@@ -336,14 +319,13 @@ class TopicUtils(object): ...@@ -336,14 +319,13 @@ class TopicUtils(object):
}, },
"order": "desc" "order": "desc"
} }
}, }
"_score"
] ]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=index_type, query_body=q,
offset=offset, size=size) offset=offset, size=size)
topic_id_list = list() topic_id_list = list()
same_pictorial_id_set = set() same_group_id_set = set()
same_user_id_set = set() same_user_id_set = set()
for item in result_dict["hits"]: for item in result_dict["hits"]:
...@@ -381,7 +363,7 @@ class TopicUtils(object): ...@@ -381,7 +363,7 @@ class TopicUtils(object):
@classmethod @classmethod
def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id, def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None): filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None,index_type="topic"):
""" """
:remark 帖子详情页推荐列表,缺少按时间衰减 :remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id: :param user_id:
...@@ -428,7 +410,7 @@ class TopicUtils(object): ...@@ -428,7 +410,7 @@ class TopicUtils(object):
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}}, # {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
], ],
...@@ -457,7 +439,7 @@ class TopicUtils(object): ...@@ -457,7 +439,7 @@ class TopicUtils(object):
"includes": ["id", "pictorial_id", "user_id", "_score"] "includes": ["id", "pictorial_id", "user_id", "_score"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name=index_type, query_body=q,
offset=offset, size=size) offset=offset, size=size)
return result_dict["hits"] return result_dict["hits"]
...@@ -598,10 +580,22 @@ class TopicUtils(object): ...@@ -598,10 +580,22 @@ class TopicUtils(object):
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
] ]
logging.info("get filters:%s"%filters)
if not filters: if not filters:
return f return f
for k, v in filters.items(): for k, v in filters.items():
if k == "is_complaint":
f.append({
"term": {k: v},
})
if k == "is_complaint":
f.append({
"term": {k: v},
})
if not v: if not v:
continue continue
...@@ -726,14 +720,74 @@ class TopicUtils(object): ...@@ -726,14 +720,74 @@ class TopicUtils(object):
if sorts: if sorts:
q["sort"] = sorts q["sort"] = sorts
logging.info("get query:%s" % q) try:
result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {
"hits": [],
"total_count": 0
}
@classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
query = ''
for k, v in filters.items():
if k == "content":
query = filters[k]
q = {}
q["query"] = {
"function_score": {
"functions": [{
"filter": {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
},
"weight": 1
}],
"query": {
"multi_match": {
"fields":["content"],
"type": "cross_fields",
"operator": "and",
"query": query
}
}
}
}
if query == '':
q["query"] = {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
}
if sorts_by:
sorts = cls.process_sort(sorts_by)
if sorts:
q["sort"] = sorts
try: try:
result_dict = ESPerform.get_search_results( result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name, ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size query_body=q, offset=offset, size=size
) )
logging.info("get result_dicts:%s" % result_dict)
return { return {
"hits": result_dict["hits"], "hits": result_dict["hits"],
......
...@@ -18,11 +18,14 @@ def business_topic_search(filters, nfilters=None, sorts_by=None, offset=0, size= ...@@ -18,11 +18,14 @@ def business_topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=
"""帖子搜索。""" """帖子搜索。"""
try: try:
result_list = TopicUtils.list_topic_ids(
result_list = TopicUtils.business_topic_ids(
filters=filters, nfilters=nfilters, sorts_by=sorts_by, filters=filters, nfilters=nfilters, sorts_by=sorts_by,
offset=offset, size=size, filter_online=False, offset=offset, size=size, filter_online=False,
index_name="topic" index_name="topic"
) )
logging.info("get result_lsit:%s"%result_list)
topic_ids = [item["_source"]["id"] for item in result_list["hits"]] topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {"topic_ids": topic_ids, "total_count": result_list["total_count"]} return {"topic_ids": topic_ids, "total_count": result_list["total_count"]}
except: except:
......
...@@ -13,7 +13,7 @@ from libs.es import ESPerform ...@@ -13,7 +13,7 @@ from libs.es import ESPerform
@bind("physical/search/query_pictorial") @bind("physical/search/query_pictorial")
def query_group(query="", offset=0, size=10): def query_pictorial(query="", offset=0, size=10):
""" """
:remark:小组搜索排序策略,缺少排序策略 :remark:小组搜索排序策略,缺少排序策略
:param query: :param query:
...@@ -105,7 +105,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -105,7 +105,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
} }
q["_source"] = { q["_source"] = {
"include": ["id", "pictorial_id", "tag_list"] "includes": ["id", "pictorial_id", "tag_list"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size) result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get result_dict:%s" % result_dict) logging.info("get result_dict:%s" % result_dict)
...@@ -142,7 +142,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -142,7 +142,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
q["_source"] = { q["_source"] = {
"include": ["id", "update_time"] "includes": ["id", "update_time"]
} }
q["sort"] = { q["sort"] = {
'update_time': { 'update_time': {
...@@ -155,8 +155,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -155,8 +155,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
for item in result_dict["hits"]: for item in result_dict["hits"]:
pictorial_id = item["_source"]["id"] pictorial_id = item["_source"]["id"]
pictorial_id_list.append(pictorial_id) pictorial_id_list.append(pictorial_id)
logging.info("get pictorial_id_list:%s" % pictorial_id_list)
logging.info("get topic_tag_list:%s" % topic_tag_list)
if len(pictorial_id_list) < 10: if len(pictorial_id_list) < 10:
num = 10 - len(pictorial_id_list) num = 10 - len(pictorial_id_list)
...@@ -177,7 +175,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -177,7 +175,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
} }
} }
q["_source"] = { q["_source"] = {
"include": ["id", "tag_id"]} "includes": ["id", "tag_id"]}
q["sort"] = { q["sort"] = {
'update_time': { 'update_time': {
'order': 'desc' 'order': 'desc'
...@@ -194,9 +192,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10): ...@@ -194,9 +192,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
if id not in pictorial_id_list: if id not in pictorial_id_list:
pictorial_id_list.append(id) # pictorial_id_list.append(id) #
logging.info("get result_dict tag:%s" % result_dict)
logging.info("get pictorial_id_list tag:%s" % pictorial_id_list)
pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10] pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10]
return {"pictorial_ids_list": pictorial_list} return {"pictorial_ids_list": pictorial_list}
......
...@@ -27,7 +27,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy ...@@ -27,7 +27,7 @@ def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageTy
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size, recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
query_type=query_type, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list) filter_topic_id_list=have_read_topic_id_list,index_type="topic-high-star")
have_read_topic_id_list.extend(recommend_topic_ids) have_read_topic_id_list.extend(recommend_topic_ids)
...@@ -46,18 +46,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -46,18 +46,11 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
query_type=TopicPageType.HOME_RECOMMEND): query_type=TopicPageType.HOME_RECOMMEND):
try: try:
if query is None: if query is None:
if user_id == -1: redis_key = "physical:home_recommend" + ":user_id:" + str(
redis_key = "physical:home_recommend" + ":user_id:" + str( user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else: else:
if user_id == -1: redis_key = "physical:home_query" + ":user_id:" + str(
redis_key = "physical:home_query" + ":user_id:" + str( user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list'] redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
...@@ -72,8 +65,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -72,8 +65,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_ids = [] recommend_topic_ids = []
have_read_topic_id_list = list() have_read_topic_id_list = list()
if redis_field_val_list[0] and query is None: if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0])) if query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
else:
if offset>0:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id) user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key) redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
...@@ -84,7 +81,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -84,7 +81,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
single_size=size,query=query, query_type=query_type, single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list, filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list, recommend_tag_list=recommend_tag_list,
user_similar_score_list=user_similar_score_redis_list) user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star")
have_read_group_id_set = set() have_read_group_id_set = set()
have_read_user_id_set = set() have_read_user_id_set = set()
unread_topic_id_dict = dict() unread_topic_id_dict = dict()
...@@ -170,11 +167,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic ...@@ -170,11 +167,11 @@ def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=Topic
recommend_topic_ids = list() recommend_topic_ids = list()
es_node_load_high_flag = False es_node_load_high_flag = False
try: # try:
es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli()) # es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli())
except: # except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) # logging.error("catch exception,err_msg:%s" % traceback.format_exc())
es_node_load_high_flag = True # es_node_load_high_flag = True
if es_node_load_high_flag: if es_node_load_high_flag:
temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1" temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1"
...@@ -273,7 +270,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic ...@@ -273,7 +270,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pic
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id, result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id, topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj) have_read_topic_list, offset, size, es_cli_obj,index_type="topic-high-star")
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list) > 0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
...@@ -348,7 +345,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1 ...@@ -348,7 +345,7 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
must_topic_id_list = list(topic_similarity_score_dict.keys()) must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size, topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list) must_topic_id_list=must_topic_id_list,index_type="topic-high-star")
return {"recommend_topic_ids": topic_id_list} return {"recommend_topic_ids": topic_id_list}
except: except:
......
...@@ -49,6 +49,7 @@ class Job(object): ...@@ -49,6 +49,7 @@ class Job(object):
def __call__(self): def __call__(self):
type_info = get_type_info_map()[self._type_name] type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo) assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk( result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name, sub_index_name=self._sub_index_name,
table_chunk=self._chunk, table_chunk=self._chunk,
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
\ No newline at end of file
...@@ -27,15 +27,15 @@ class Group(models.Model): ...@@ -27,15 +27,15 @@ class Group(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
# #获取前一天4,5星发帖数 #获取前一天4,5星发帖数
# def get_high_quality_topic_num(self): def get_high_quality_topic_num(self):
# yesterday = datetime.datetime.now()-datetime.timedelta(days=1) yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
# yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day) yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
# yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day) yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
# topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count() topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
# return topic_num return topic_num
def detail(self): def detail(self):
result = { result = {
......
from django.db import models
import datetime import datetime
from django.db import models
import logging import logging
import traceback import traceback
...@@ -81,13 +81,9 @@ class Pictorial(models.Model): ...@@ -81,13 +81,9 @@ class Pictorial(models.Model):
topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True) topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
logging.info("get topic_id_list:%s" % topic_id_list)
topic_num = Topic.objects.filter(content_level__in=["4", "5"], create_time__gte=yesterday_begin_time, topic_num = Topic.objects.filter(content_level__in=["4", "5"], create_time__gte=yesterday_begin_time,
create_time__lte=yesterday_end_time, id__in=topic_id_list).count() create_time__lte=yesterday_end_time, id__in=topic_id_list).count()
logging.info("get topic_num:%s" % topic_num)
return topic_num return topic_num
except: except:
...@@ -98,9 +94,7 @@ class Pictorial(models.Model): ...@@ -98,9 +94,7 @@ class Pictorial(models.Model):
try: try:
tag_id_list = [] tag_id_list = []
tags = PictorialTag.objects.filter(pictorial_id=self.id, is_online=True).values_list("tag_id", flat=True) tags = PictorialTag.objects.filter(pictorial_id=self.id, is_online=True).values_list("tag_id", flat=True)
logging.info("get tags:%s" % tags)
for i in tags: for i in tags:
logging.info("get tags i:%s" % i)
tag_id_list.append(i) tag_id_list.append(i)
return tag_id_list return tag_id_list
...@@ -112,13 +106,9 @@ class Pictorial(models.Model): ...@@ -112,13 +106,9 @@ class Pictorial(models.Model):
def get_tag_by_name(self, tag_id): def get_tag_by_name(self, tag_id):
try: try:
tag_name_list = [] tag_name_list = []
logging.info("get tag_id:%s" % tag_id)
tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True) tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True)
logging.info("get tags name :%s" % tags)
for i in tags: for i in tags:
tag_name_list.append(i) tag_name_list.append(i)
logging.info("get tags name i:%s" % i)
return tag_name_list return tag_name_list
......
...@@ -52,9 +52,9 @@ class Topic(models.Model): ...@@ -52,9 +52,9 @@ class Topic(models.Model):
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True) id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
name = models.CharField(verbose_name=u'日记名称', max_length=100) name = models.CharField(verbose_name=u'日记名称', max_length=100)
# group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1) # group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
# group = models.ForeignKey( group = models.ForeignKey(
# Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None, Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
# on_delete=models.CASCADE) on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID') user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.BooleanField(verbose_name=u'是否是视频日记') has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0) drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
...@@ -106,8 +106,7 @@ class Topic(models.Model): ...@@ -106,8 +106,7 @@ class Topic(models.Model):
try: try:
has_image = False has_image = False
query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False, query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False, is_online=True)
is_online=True)
if len(query_list) > 0: if len(query_list) > 0:
has_image = True has_image = True
...@@ -133,13 +132,12 @@ class Topic(models.Model): ...@@ -133,13 +132,12 @@ class Topic(models.Model):
topic_tag_id_list = list() topic_tag_id_list = list()
edit_tag_id_list = list() edit_tag_id_list = list()
tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id", tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id", flat=True)
flat=True)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list) tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list)
for tag_item in tag_query_results: for tag_item in tag_query_results:
is_online = tag_item.is_online is_online=tag_item.is_online
is_deleted = tag_item.is_deleted is_deleted=tag_item.is_deleted
collection = tag_item.collection collection=tag_item.collection
if is_online and not is_deleted: if is_online and not is_deleted:
topic_tag_id_list.append(tag_item.id) topic_tag_id_list.append(tag_item.id)
...@@ -149,7 +147,7 @@ class Topic(models.Model): ...@@ -149,7 +147,7 @@ class Topic(models.Model):
return (topic_tag_id_list, edit_tag_id_list) return (topic_tag_id_list, edit_tag_id_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], []) return ([],[])
def get_tag_name_list(self, tag_id_list): def get_tag_name_list(self, tag_id_list):
try: try:
...@@ -188,12 +186,9 @@ class Topic(models.Model): ...@@ -188,12 +186,9 @@ class Topic(models.Model):
elif self.content_level == '3': elif self.content_level == '3':
offline_score += 2.0 offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
data_type=1).count() click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=3).count()
if exposure_count > 0: if exposure_count > 0:
offline_score += click_count / exposure_count offline_score += click_count / exposure_count
...@@ -224,8 +219,7 @@ class TopicComplaint(models.Model): ...@@ -224,8 +219,7 @@ class TopicComplaint(models.Model):
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True) id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True) user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True)
topic = models.ForeignKey( topic = models.ForeignKey(
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE, Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE, related_name='complaints')
related_name='complaints')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True) is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
......
...@@ -81,6 +81,9 @@ class TypeInfo(object): ...@@ -81,6 +81,9 @@ class TypeInfo(object):
def bulk_get_data(self, instance_iterable): def bulk_get_data(self, instance_iterable):
data_list = [] data_list = []
# 4,5星帖子单独索引
topic_data_high_star_list = list()
if self.batch_get_data_func: if self.batch_get_data_func:
_pk_list = [getattr(instance, 'pk', None) for instance in instance_iterable] _pk_list = [getattr(instance, 'pk', None) for instance in instance_iterable]
not_found_pk_list = [] not_found_pk_list = []
...@@ -137,7 +140,10 @@ class TypeInfo(object): ...@@ -137,7 +140,10 @@ class TypeInfo(object):
)) ))
else: else:
data_list.append(data) data_list.append(data)
return data_list if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
topic_data_high_star_list.append(data)
return (data_list,topic_data_high_star_list)
def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None): def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None):
...@@ -162,10 +168,10 @@ class TypeInfo(object): ...@@ -162,10 +168,10 @@ class TypeInfo(object):
# traceback.print_exc() # traceback.print_exc()
# es_result = 'error' # es_result = 'error'
return ESPerform.es_helpers_bulk(es, data_list, sub_index_name, True) return ESPerform.es_helpers_bulk(es, data_list, sub_index_name)
def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None): def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None):
data_list = self.bulk_get_data(instance_iterable) data_list, topic_data_high_star_list = self.bulk_get_data(instance_iterable)
return self.elasticsearch_bulk_insert_data( return self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name, sub_index_name=sub_index_name,
data_list=data_list, data_list=data_list,
...@@ -188,7 +194,7 @@ class TypeInfo(object): ...@@ -188,7 +194,7 @@ class TypeInfo(object):
time1=end-begin time1=end-begin
begin = time.time() begin = time.time()
data_list = self.bulk_get_data(instance_list) data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
end = time.time() end = time.time()
time2=end-begin time2=end-begin
...@@ -201,6 +207,14 @@ class TypeInfo(object): ...@@ -201,6 +207,14 @@ class TypeInfo(object):
data_list=data_list, data_list=data_list,
es=es, es=es,
) )
# 同时写4星及以上的帖子
if len(topic_data_high_star_list)>0:
self.elasticsearch_bulk_insert_data(
sub_index_name="topic-high-star",
data_list=topic_data_high_star_list,
es=es,
)
end = time.time() end = time.time()
time3=end-begin time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3)) logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
...@@ -215,7 +229,7 @@ class TypeInfo(object): ...@@ -215,7 +229,7 @@ class TypeInfo(object):
stage_1_time = time.time() stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list) data_list, topic_data_high_star_list = self.bulk_get_data(instance_list)
stage_2_time = time.time() stage_2_time = time.time()
...@@ -260,7 +274,16 @@ def get_type_info_map(): ...@@ -260,7 +274,16 @@ def get_type_info_map():
return _get_type_info_map_result return _get_type_info_map_result
type_info_list = [ type_info_list = [
TypeInfo(
name='topic-high-star', # >=4星日记
type='topic-high-star',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo( TypeInfo(
name='topic', # 日记 name='topic', # 日记
type='topic', type='topic',
......
...@@ -33,7 +33,7 @@ class GroupTransfer(object): ...@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time) tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time res["update_time"] = tzlc_udpate_time
# res["high_quality_topic_num"] = instance.get_high_quality_topic_num() res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res return res
except: except:
......
...@@ -28,11 +28,11 @@ class TopicTransfer(object): ...@@ -28,11 +28,11 @@ class TopicTransfer(object):
res["content_level"] = instance.content_level res["content_level"] = instance.content_level
res["user_id"] = instance.user_id res["user_id"] = instance.user_id
# if instance.group: if instance.group:
# res["group_id"] = instance.group.id res["group_id"] = instance.group.id
# else: else:
# res["group_id"] = -1 res["group_id"] = -1
res["share_num"] = instance.share_num
res["pictorial_id"] = instance.get_pictorial_id() res["pictorial_id"] = instance.get_pictorial_id()
...@@ -82,6 +82,20 @@ class TopicTransfer(object): ...@@ -82,6 +82,20 @@ class TopicTransfer(object):
# else: # else:
# res["language_type"] = instance.language_type # res["language_type"] = instance.language_type
res["is_shadow"] = instance.is_shadow
res["is_recommend"] = True if instance.is_recommend else False
res["is_complaint"] = instance.is_complaint
res["virtual_content_level"] = instance.virtual_content_level
res["like_num_crawl"] = instance.like_num_crawl
res["comment_num_crawl"]= instance.comment_num_crawl
res["is_crawl"] = instance.is_crawl
res["platform"] = instance.platform
res["platform_id"] = instance.platform_id
res["drop_score"] = instance.drop_score
res["sort_score"] = instance.sort_score
create_time = instance.create_time create_time = instance.create_time
tzlc_create_time = tzlc(create_time) tzlc_create_time = tzlc(create_time)
...@@ -91,11 +105,11 @@ class TopicTransfer(object): ...@@ -91,11 +105,11 @@ class TopicTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_update_time = tzlc(update_time) tzlc_update_time = tzlc(update_time)
# res["update_time"] = tzlc_update_time res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple())) res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4)) logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None return None
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment