Commit 826968cf authored by 段英荣's avatar 段英荣

Merge branch 'test' into 'master'

# Conflicts:
#   search/utils/topic.py
parents 7d856d9a 2bc3df2e
This diff is collapsed.
......@@ -21,6 +21,7 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type))
logging.info("get es_type:%s"%es_type)
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
......
......@@ -111,6 +111,10 @@ class ESPerform(object):
return False
mapping_dict = cls.__load_mapping(sub_index_name)
logging.info("get write_alias_name:%s"%write_alias_name)
logging.info("get mapping_dict:%s"%mapping_dict)
logging.info("get mapping_type:%s"%mapping_type)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True
......
......@@ -47,7 +47,7 @@ class GroupUtils(object):
return {"total_count":0, "hits":[]}
@classmethod
def get_hot_group_recommend_result_list(cls,offset,size,es_cli_obj=None):
def get_hot_pictorial_recommend_result_list(cls,offset,size,es_cli_obj=None):
try:
if not es_cli_obj:
es_cli_obj = ESPerform.get_cli()
......@@ -68,19 +68,19 @@ class GroupUtils(object):
"includes":["id"]
}
result_dict = ESPerform.get_search_results(es_cli_obj,"group",q,offset,size)
result_dict = ESPerform.get_search_results(es_cli_obj,"pictorial",q,offset,size)
group_ids_list = []
pictorial_ids_list = []
if len(result_dict["hits"]) > 0:
group_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]
pictorial_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]
return group_ids_list
return pictorial_ids_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def get_user_attention_group_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
def get_user_attention_pictorial_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
"""
:remark: 获取用户关注小组列表
:return:
......@@ -100,12 +100,12 @@ class GroupUtils(object):
}
}
q["_source"] = {
"includes":["attention_group_id_list"]
"includes":["attention_pictorial_id_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj,"user",q,offset,size)
if len(result_dict["hits"])>0:
return result_dict["hits"][0]["_source"]["attention_group_id_list"]
return result_dict["hits"][0]["_source"]["attention_pictorial_id_list"]
else:
return []
except:
......@@ -113,7 +113,7 @@ class GroupUtils(object):
return []
@classmethod
def get_group_ids_by_aggs(cls,group_id_list,es_cli_obj=None):
def get_pictorial_ids_by_aggs(cls,pictorial_ids_list,es_cli_obj=None):
"""
:remark:聚合查询获取小组列表
:param group_id_list:
......@@ -127,13 +127,13 @@ class GroupUtils(object):
q["size"]=0
q["query"] = {
"terms":{
"group_id":group_id_list
"pictorial_id":pictorial_ids_list
}
}
q["aggs"] = {
"group_ids":{
"pictorial_ids":{
"terms":{
"field":"group_id"
"field":"pictorial_id"
},
"aggs":{
"max_date":{
......@@ -146,12 +146,12 @@ class GroupUtils(object):
}
result_dict = ESPerform.get_search_results(es_cli_obj,"topic",q,aggregations_query=True)
buckets_list = result_dict["aggregations"]["group_ids"]["buckets"]
buckets_list = result_dict["aggregations"]["pictorial_ids"]["buckets"]
sorted_buckets_list = sorted(buckets_list,key=lambda item:item["max_date"]["value"],reverse=True)
sorted_group_id_list = [item["key"] for item in sorted_buckets_list]
sorted_pictorial_id_list = [item["key"] for item in sorted_buckets_list]
return sorted_group_id_list
return sorted_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
\ No newline at end of file
......@@ -32,7 +32,7 @@ class TopicUtils(object):
}
}
q["_source"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_group_user_id_list"]
q["_source"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"]
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)
......@@ -101,7 +101,7 @@ class TopicUtils(object):
}
},
"_source":{
"includes": ["id", "group_id", "offline_score", "user_id", "edit_tag_list"]
"includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
......@@ -136,6 +136,7 @@ class TopicUtils(object):
attention_user_id_list = list()
pick_user_id_list = list()
# same_group_id_list = list()
same_pictorial_id_list = list()
user_tag_list = list()
result_dict = TopicUtils.get_related_user_info(user_id, 0, 1)
......@@ -151,9 +152,13 @@ class TopicUtils(object):
# same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"]
# same_group_id_list = [item["user_id"] for item in same_group_user_info_list]
# same_group_id_list = same_group_id_list[:100]
same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list]
same_pictorial_id_list = same_pictorial_id_list[:100]
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
logging.info("get same_pictorial_id_list :%s"%same_pictorial_id_list)
q = dict()
q["query"] = dict()
......@@ -212,6 +217,14 @@ class TopicUtils(object):
# "weight": 1
# }
# )
if len(same_pictorial_id_list)>0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":same_pictorial_id_list}}}},
"weight": 1
}
)
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list)>0:
......@@ -304,7 +317,7 @@ class TopicUtils(object):
"field": "user_id"
}
q["_source"] = {
"includes":["id","group_id","offline_score","user_id","edit_tag_list"]
"includes":["id","pictorial_id","offline_score","user_id","edit_tag_list"]
}
q["sort"] = [
{
......@@ -325,7 +338,7 @@ class TopicUtils(object):
offset=offset, size=size)
topic_id_list = list()
same_group_id_set = set()
same_pictorial_id_set = set()
same_user_id_set = set()
for item in result_dict["hits"]:
......@@ -362,7 +375,7 @@ class TopicUtils(object):
return list()
@classmethod
def get_topic_detail_recommend_list(cls,user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,have_read_topic_list,offset,size,es_cli_obj=None):
def get_topic_detail_recommend_list(cls,user_id,topic_id,topic_tag_list,topic_pictorial_id,topic_user_id,filter_topic_user_id,have_read_topic_list,offset,size,es_cli_obj=None):
"""
:remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id:
......@@ -395,11 +408,11 @@ class TopicUtils(object):
}
}
]
if isinstance(topic_group_id,int) and topic_group_id > 0:
if isinstance(topic_pictorial_id,int) and topic_pictorial_id > 0:
functions_list.append(
{
"filter": {"term": {
"group_id": topic_group_id}},
"pictorial_id": topic_pictorial_id}},
"weight": 1,
}
)
......@@ -435,7 +448,7 @@ class TopicUtils(object):
}
q["query"]["function_score"] = query_function_score
q["_source"] = {
"includes":["id","group_id","user_id","_score"]
"includes":["id","pictorial_id","user_id","_score"]
}
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q,
......@@ -547,7 +560,7 @@ class TopicUtils(object):
"function_score":query_function_score
}
q["_source"] = {
"includes":["id","group_id","user_id","_score","offline_score","manual_score"]
"includes":["id","pictorial_id","user_id","_score","offline_score","manual_score"]
}
q["sort"] = [
{
......
......@@ -12,7 +12,7 @@ from search.utils.common import GroupSortTypes
from libs.es import ESPerform
@bind("physical/search/query_group")
@bind("physical/search/query_pictorial")
def query_group(query="",offset=0,size=10):
"""
:remark:小组搜索排序策略,缺少排序策略
......@@ -33,8 +33,8 @@ def query_group(query="",offset=0,size=10):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_ids":[]}
@bind("physical/search/group_sort")
def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=10):
@bind("physical/search/pictorial_sort")
def pictorial_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=10):
"""
:remark 小组排序,缺少:前1天发评论人数*x
:param user_id:
......@@ -51,27 +51,27 @@ def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=1
es_cli_obj = ESPerform.get_cli()
if sort_type==GroupSortTypes.HOT_RECOMMEND:
group_ids_list = GroupUtils.get_hot_group_recommend_result_list(offset,size,es_cli_obj)
pictorial_ids_list = GroupUtils.get_hot_pictorial_recommend_result_list(offset,size,es_cli_obj)
return {"group_recommend_ids":group_ids_list}
return {"pictorial_recommend_ids":pictorial_ids_list}
elif sort_type==GroupSortTypes.ATTENTION_RECOMMEND:
attention_group_list = GroupUtils.get_user_attention_group_list(user_id,offset=0,size=1,es_cli_obj=es_cli_obj)
if len(attention_group_list)==0:
return {"group_recommend_ids": []}
attention_pictorial_list = GroupUtils.get_user_attention_pictorial_list(user_id,offset=0,size=1,es_cli_obj=es_cli_obj)
if len(attention_pictorial_list)==0:
return {"pictorial_recommend_ids": []}
else:
attention_group_id_list = [item["group_id"] for item in attention_group_list]
sorted_group_ids_list = GroupUtils.get_group_ids_by_aggs(attention_group_id_list,es_cli_obj)
attention_pictorial_id_list = [item["pictorial_id"] for item in attention_pictorial_list]
sorted_pictorial_ids_list = GroupUtils.get_pictorial_ids_by_aggs(attention_pictorial_id_list,es_cli_obj)
group_recommend_ids_list = sorted_group_ids_list
pictorial_recommend_ids_list = sorted_pictorial_ids_list
#if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list):
sorted_attention_group_list = sorted(attention_group_list,key=lambda item:item["update_time_val"],reverse=True)
for item in sorted_attention_group_list:
if item["group_id"] not in group_recommend_ids_list:
group_recommend_ids_list.append(item["group_id"])
sorted_attention_pictorial_list = sorted(attention_pictorial_list,key=lambda item:item["update_time_val"],reverse=True)
for item in sorted_attention_pictorial_list:
if item["pictorial_id"] not in pictorial_recommend_ids_list:
pictorial_recommend_ids_list.append(item["pictorial_id"])
return {"group_recommend_ids": group_recommend_ids_list[offset:(offset+size)]}
return {"pictorial_recommend_ids": pictorial_recommend_ids_list[offset:(offset+size)]}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_recommend_ids":[]}
return {"pictorial_recommend_ids":[]}
......@@ -247,7 +247,7 @@ def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10)
@bind("physical/search/topic_detail_page_recommend")
def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_group_id=-1, topic_user_id=-1,
def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pictorial_id=-1, topic_user_id=-1,
filter_topic_user_id=False, offset=0, size=10):
"""
:remark:帖子详情页推荐策略,缺少第一个卡片策略
......@@ -271,7 +271,7 @@ def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_gro
# 获取帖子标签列表
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id, es_cli_obj)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_group_id,
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj)
recommend_topic_ids_list = list()
......
......@@ -49,7 +49,6 @@ class Job(object):
def __call__(self):
type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name,
table_chunk=self._chunk,
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"is_recommend":{"type":"boolean"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_num":{"type":"long"},
"creator_id":{"type":"long"},
"icon":{"type":"text"},
"high_quality_topic_num":{"type":"long"},//前一天该小组4&5星帖子数量
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"}
}
}
\ No newline at end of file
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
......@@ -50,4 +50,4 @@
}
}
}
\ No newline at end of file
}
......@@ -28,17 +28,31 @@
"country_id":{"type":"text"}
}
},
"same_group_user_id_list":{//同组用户列表
// "same_group_user_id_list":{//同组用户列表
// "type":"nested",
// "properties":{
// "user_id":{"type":"long"},
// "country_id":{"type":"text"}
// }
// },
// "attention_group_id_list":{//关注小组列表
// "type":"nested",
// "properties":{
// "group_id":{"type":"long"},
// "update_time_val":{"type":"long"}
// }
// },
"same_pictorial_user_id_list":{//同画报用户列表
"type":"nested",
"properties":{
"user_id":{"type":"long"},
"country_id":{"type":"text"}
}
},
"attention_group_id_list":{//关注小组列表
"attention_pictorial_id_list":{//关注画报列表
"type":"nested",
"properties":{
"group_id":{"type":"long"},
"pictorial_id":{"type":"long"},
"update_time_val":{"type":"long"}
}
},
......
......@@ -27,15 +27,15 @@ class Group(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
#获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
return topic_num
# #获取前一天4,5星发帖数
# def get_high_quality_topic_num(self):
# yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
# yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
# yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
# topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
# return topic_num
def detail(self):
result = {
......
# from django.db import models
from django.db import models
import datetime
import logging
import traceback
# from .topic import Topic
class PictorialTopic(models.Model):
"""画报帖子关系"""
# class PictorialTopic(models.Model):
# """画报帖子关系"""
class Meta:
verbose_name = u'画报帖子关系'
app_label = 'community'
db_table = 'community_pictorial_topic'
# class Meta:
# verbose_name = u'画报帖子关系'
# app_label = 'community'
# db_table = 'community_pictorial_topic'
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
# id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
# pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
# topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
# user_id = models.BigIntegerField(verbose_name=u'用户ID')
# is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
# is_online = models.BooleanField(verbose_name=u'是否上线')
# is_deleted = models.BooleanField(verbose_name=u'是否删除')
class PictorialFollow(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报用户关系"
app_label = "community"
db_table = "community_pictorial_follow"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
user_id = models.BigIntegerField(verbose_name=u'用户ID')
class Pictorial(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报"
app_label = "community"
db_table = "community_pictorial"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
is_recommend = models.BooleanField(verbose_name=u'推荐')
name = models.CharField(verbose_name=u'画报名称', max_length=100)
description = models.CharField(verbose_name=u'画报描述', max_length=200)
creator_id = models.BigIntegerField(verbose_name=u'画报用户ID')
icon = models.CharField(verbose_name=u'画报名称', max_length=255)
topic_num = models.IntegerField(verbose_name=u'次数')
# 获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
try:
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
topic_id_list = PictorialTopic.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
logging.info("get topic_id_list:%s" % topic_id_list)
# topic_num = Topic.filter(content_level__in=("4", "5"), create_time__gte=yesterday_begin_time,
# create_time__lte=yesterday_end_time, topic_id_in=topic_id_list).count()
#
# logging.info("get topic_num:%s" % topic_num)
#
# return topic_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
......@@ -16,6 +16,7 @@ from .pick_topic import PickTopic
from .tag import TopicTag, Tag
from .user_extra import UserExtra
from .group import Group
from .pictorial import PictorialTopic
class ActionSumAboutTopic(models.Model):
......@@ -53,9 +54,9 @@ class Topic(models.Model):
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
name = models.CharField(verbose_name=u'日记名称', max_length=100)
# group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
group = models.ForeignKey(
Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
on_delete=models.CASCADE)
# group = models.ForeignKey(
# Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
# on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
......@@ -83,6 +84,21 @@ class Topic(models.Model):
platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA)
platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True)
def get_pictorial_id(self):
try:
logging.info("get user id :%s" % self.id)
pictorial_id_list =[]
pictorial_id = PictorialTopic.objects.filter(topic_id=self.id).values_list("pictorial_id",flat=True)
for i in pictorial_id:
pictorial_id_list.append(i)
if len(pictorial_id_list) > 0:
logging.info("get user attention pictorial id :%s" % pictorial_id_list)
return pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@property
def is_complaint(self):
"""是否被举报"""
......@@ -96,7 +112,8 @@ class Topic(models.Model):
try:
has_image = False
query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False, is_online=True)
query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False,
is_online=True)
if len(query_list) > 0:
has_image = True
......@@ -122,12 +139,13 @@ class Topic(models.Model):
topic_tag_id_list = list()
edit_tag_id_list = list()
tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id", flat=True)
tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id",
flat=True)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list)
for tag_item in tag_query_results:
is_online=tag_item.is_online
is_deleted=tag_item.is_deleted
collection=tag_item.collection
is_online = tag_item.is_online
is_deleted = tag_item.is_deleted
collection = tag_item.collection
if is_online and not is_deleted:
topic_tag_id_list.append(tag_item.id)
......@@ -137,7 +155,7 @@ class Topic(models.Model):
return (topic_tag_id_list, edit_tag_id_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
return ([], [])
def get_tag_name_list(self, tag_id_list):
try:
......@@ -177,9 +195,12 @@ class Topic(models.Model):
elif self.content_level == '3':
offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=1).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=3).count()
if exposure_count > 0:
offline_score += click_count / exposure_count
......@@ -210,6 +231,7 @@ class TopicComplaint(models.Model):
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True)
topic = models.ForeignKey(
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE, related_name='complaints')
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE,
related_name='complaints')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
\ No newline at end of file
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
......@@ -15,7 +15,7 @@ from .group_user_role import GroupUserRole
from .tag import AccountUserTag
from .topic import Topic
from .user_extra import UserExtra
from .pictorial import PictorialFollow
class User(models.Model):
class Meta:
......@@ -91,6 +91,26 @@ class User(models.Model):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_attention_pictorial_id_list(self):
try:
attention_pictorial_id_list = list()
query_results = PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True, user_id=self.user_id)
logging.info("get PictorialFollow:%s"%query_results)
for item in query_results:
item_dict = {
"pictorial_id": item.pictorial_id,
"update_time_val": time.mktime(tzlc(item.update_time).timetuple())
}
attention_pictorial_id_list.append(item_dict)
logging.info("get user_id:%s" %self.user_id)
logging.info("get same_pictorial_user_id_list:%s" % attention_pictorial_id_list)
return attention_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_pick_user_id_list(self):
pick_topic_id_list = list()
user_picks = self.user_pick.using(settings.SLAVE_DB_NAME).filter(is_deleted=False, is_pick=True)
......@@ -117,32 +137,35 @@ class User(models.Model):
return pick_user_detail_list
def get_same_group_user_id_list(self):
def get_same_pictorial_user_id_list(self):
#todo 有的同组数据过大,导致celery cpu过高,暂时限制同组的数据大小,后续可能会去掉同组的数据
same_group_user_id_list = list()
group_items_list = list(GroupUserRole.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).values_list("group_id",flat=True))
for group_id in group_items_list:
user_items_list = list(GroupUserRole.objects.using(settings.SLAVE_DB_NAME).filter(group_id=group_id).values_list("user_id",flat=True))
same_pictorial_user_id_list = list()
pictorial_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).values_list("pictorial_id",flat=True))
for pictorial_id in pictorial_items_list:
user_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(pictorial_id=pictorial_id).values_list("user_id",flat=True))
for user_id in user_items_list:
same_group_user_id_list.append(user_id)
if len(same_group_user_id_list)>=100:
same_pictorial_user_id_list.append(user_id)
if len(same_pictorial_user_id_list)>=100:
break
if len(same_group_user_id_list)>=100:
if len(same_pictorial_user_id_list)>=100:
break
logging.info("get same user_id:%s"%self.user_id)
logging.info("get same_pictorial_user_id_list:%s"%same_pictorial_user_id_list)
same_group_detail_list = list()
for i in range(0, len(same_group_user_id_list), 200):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=same_group_user_id_list[i:i + 1000])
same_pictorial_detail_list = list()
for i in range(0, len(same_pictorial_user_id_list), 200):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=same_pictorial_user_id_list[i:i + 1000])
for detail_data in sql_data_list:
item = {
"user_id": detail_data.user_id,
"country_id": detail_data.country_id
}
same_group_detail_list.append(item)
same_pictorial_detail_list.append(item)
return same_group_detail_list
return same_pictorial_detail_list
def get_user_tag_id_list(self):
try:
......
......@@ -12,11 +12,12 @@ import elasticsearch
import elasticsearch.helpers
import sys
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer
from trans2es.utils.topic_transfer import TopicTransfer
from trans2es.utils.pictorial_transfer import PictorialTransfer
from trans2es.utils.celebrity_transfer import CelebrityTransfer
from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.contrast_similar_transfer import Contrast_Similar_Transfer
......@@ -192,6 +193,9 @@ class TypeInfo(object):
time2=end-begin
begin = time.time()
logging.info("get sub_index_name:%s"%sub_index_name)
logging.info("get data_list:%s"%data_list)
self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name,
data_list=data_list,
......@@ -256,6 +260,7 @@ def get_type_info_map():
return _get_type_info_map_result
type_info_list = [
TypeInfo(
name='topic', # 日记
type='topic',
......@@ -266,6 +271,16 @@ def get_type_info_map():
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='topic-v1', # 日记
type='topic-v1',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name="user", # 用户
type="user",
......@@ -330,7 +345,18 @@ def get_type_info_map():
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2
),
TypeInfo(
name="pictorial", # 画报
type="pictorial",
model=pictorial.Pictorial,
query_deferred=lambda: pictorial.Pictorial.objects.all().query,
get_data_func=PictorialTransfer.get_poctorial_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
)
]
type_info_map = {
......@@ -340,3 +366,4 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map
return type_info_map
......@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
# res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res
except:
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc
class PictorialTransfer(object):
def __init__(self):
pass
@classmethod
def get_poctorial_data(cls,instance):
try:
res = dict()
res["id"] = instance.id
res["is_online"] = instance.is_online
res["is_deleted"] = instance.is_deleted
res["is_recommend"] = instance.is_recommend
res["name"] = instance.name
res["description"] = instance.description
res["topic_num"] = instance.topic_num
res["creator_id"] = instance.creator_id
res["icon"] = instance.icon
create_time = instance.create_time
tzlc_create_time = tzlc(create_time)
res["create_time"] = tzlc_create_time
update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
\ No newline at end of file
......@@ -28,10 +28,15 @@ class TopicTransfer(object):
res["content_level"] = instance.content_level
res["user_id"] = instance.user_id
if instance.group:
res["group_id"] = instance.group.id
else:
res["group_id"] = -1
# if instance.group:
# res["group_id"] = instance.group.id
# else:
# res["group_id"] = -1
res["pictorial_id"] = instance.get_pictorial_id()
logging.info("get res['pictorial_id']:%s"%res["pictorial_id"])
res["share_num"] = instance.share_num
begin = time.time()
......@@ -87,7 +92,7 @@ class TopicTransfer(object):
update_time = instance.update_time
tzlc_update_time = tzlc(update_time)
res["update_time"] = tzlc_update_time
# res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
......
......@@ -74,16 +74,26 @@ class UserTransfer(object):
try:
res["tag_list"] = instance.get_user_tag_id_list()
res["attention_user_id_list"] = cls.get_follow_user_id_list(userInstance=instance)
res["attention_group_id_list"] = instance.get_attention_group_id_list()
# res["attention_group_id_list"] = instance.get_attention_group_id_list()
res["pick_user_id_list"] = instance.get_pick_user_id_list()
res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["attention_pictorial_id_list"] = instance.get_attention_pictorial_id_list()
# res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["same_pictorial_user_id_list"] = instance.get_same_pictorial_user_id_list()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
res["tag_list"] = []
res["attention_user_id_list"] = []
res["attention_group_id_list"] = []
# res["attention_group_id_list"] = []
res["pick_user_id_list"] = []
res["same_group_user_id_list"] = []
# res["same_group_user_id_list"] = []
res["attention_pictorial_id_list"] = []
res["same_pictorial_user_id_list"] = []
return res
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment