Commit d10f2829 authored by haowang's avatar haowang

Merge branch 'master' of http://git.wanmeizhensuo.com/alpha/physical into haow/dev

parents c10eb54e 10ced1c0
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (venv)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (physical1)" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>
......
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="jdk" jdkName="Python 3.6 (physical1)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
......
This diff is collapsed.
......@@ -11,22 +11,26 @@ import traceback
from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar,UserSimilarScore
import json
from linucb.utils.register_user_tag import RegisterUserTag
@shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False):
try:
pk_list = list(frozenset(pk_list))
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
if es_type == "register_user_tag":
RegisterUserTag.get_register_user_tag(pk_list)
else:
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("consume es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -37,7 +41,7 @@ def sync_face_similar_data_to_redis():
result_items = FaceUserContrastSimilar.objects.filter(is_online=True, is_deleted=False).distinct().values(
"participant_user_id").values_list("participant_user_id", flat=True)
logging.info("duan add,begin sync_face_similar_data_to_redis!")
logging.info("begin sync_face_similar_data_to_redis!")
redis_key_prefix = "physical:user_similar:participant_user_id:"
for participant_user_id in result_items:
......@@ -58,7 +62,7 @@ def sync_face_similar_data_to_redis():
)
redis_client.set(redis_key, json.dumps(item_list))
logging.info("duan add,participant_user_id:%d set data done!" % participant_user_id)
logging.info("participant_user_id:%d set data done!" % participant_user_id)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -68,7 +72,7 @@ def sync_user_similar_score():
results_items = UserSimilarScore.objects.filter(is_deleted=False).distinct().values("user_id").values_list("user_id",flat=True)
redis_key_prefix = "physical:user_similar_score:user_id:"
logging.info("duan add,begin sync user_similar_score!")
logging.info("begin sync user_similar_score!")
for user_id in results_items:
redis_key = redis_key_prefix + str(user_id)
similar_results_items = UserSimilarScore.objects.filter(is_deleted=False,user_id=user_id).order_by("-score")
......
......@@ -155,25 +155,27 @@ class ESPerform(object):
bulk_actions = []
if sub_index_name=="topic":
if sub_index_name=="topic" or sub_index_name=="topic-star-routing":
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
'routing': data["content_level"]
})
if data:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
'routing': data["content_level"]
})
else:
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
if data:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions)
return True
......@@ -262,30 +264,54 @@ class ESPerform(object):
return True
@classmethod
def get_tag_topic_list(cls,tag_id):
def get_tag_topic_list(cls,tag_id,have_read_topic_id_list):
try:
functions_list = list()
for id in tag_id:
functions_list.append(
{
"filter": {"term": {"tag_list": id}},
"weight": 1
}
)
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}}
]
"function_score":{
"query": {
"bool": {
"must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}}
]
}
},
"boost_mode": "sum",
"score_mode": "sum",
"functions": functions_list
}
},
"_source":{
"include":["id"]
},
"sort":[
{"_score": {"order": "desc"}},
{"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}},
]
}
if len(have_read_topic_id_list)>0:
q["query"]["function_score"]["query"]["bool"]["must_not"] = {
"terms":{
"id":have_read_topic_id_list
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=5000,routing="4,5")
offset=0, size=100,routing="3,4,5")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s"%str(topic_id_list))
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -28,54 +28,62 @@ class RegisterUserTag(object):
linucb_device_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:device_id:"
linucb_user_id_register_tag_topic_id_prefix = "physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key = "physical:linucb:register_user_tag_info"
@classmethod
def get_register_user_tag(cls,pk_list):
try:
user_id_set = set()
# user_id_set = set()
user_id_dict = dict()
query_results = AccountUserTag.objects.filter(pk__in=pk_list)
for item in query_results:
tag_id = item.tag_id
user_id = item.user
user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id", flat=True)
user_id_dict[user_id] = user_tag_list
if user_id not in user_id_set:
user_id_set.add(user_id)
user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
TopicPageType.HOME_RECOMMEND)
recommend_topic_id_list = list()
cycle_num = int(10000/len(user_tag_list))
for index in range(0,cycle_num):
for tag_id in user_tag_list:
redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
if not redis_tag_id_data:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
redis_client.expire(redis_tag_id_key,1*24*60*60)
if len(tag_topic_id_list)>index:
for topic_id in tag_topic_id_list[index:]:
if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
recommend_topic_id_list.append(topic_id)
break
redis_register_tag_topic_data = {
"data": json.dumps(recommend_topic_id_list),
"cursor": 0
}
redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
redis_client.expire(topic_recommend_redis_key,30*24*60*60)
for user_id in user_id_dict:
redis_client.hset(cls.linucb_register_user_tag_key, user_id, json.dumps(list(user_id_dict[user_id])))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
#
# user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
#
# have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
# TopicPageType.HOME_RECOMMEND)
# recommend_topic_id_list = list()
# cycle_num = int(10000/len(user_tag_list))
# for index in range(0,cycle_num):
# for tag_id in user_tag_list:
# redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
# redis_tag_id_data = redis_client.get(redis_tag_id_key)
# tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
# if not redis_tag_id_data:
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
# redis_client.expire(redis_tag_id_key,1*24*60*60)
#
# if len(tag_topic_id_list)>index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list.append(topic_id)
# break
#
# redis_register_tag_topic_data = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor": 0
# }
# redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
# redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
#
# topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
# redis_data_dict = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor":0
# }
# redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
# redis_client.expire(topic_recommend_redis_key,30*24*60*60)
#
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
This diff is collapsed.
......@@ -20,11 +20,16 @@ class LinUCB:
default_tag_list = list()
@classmethod
def get_default_tag_list(cls):
def get_default_tag_list(cls,user_id):
try:
if len(cls.default_tag_list) == 0:
cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
if user_id:
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
cls.default_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
if len(cls.default_tag_list) == 0:
cls.default_tag_list = Tag.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True,collection=1).values_list("id",flat=True)[0:100]
return cls.default_tag_list
except:
......
......@@ -9,7 +9,7 @@ from libs.es import ESPerform
class GroupUtils(object):
@classmethod
def get_group_query_result(cls,query,offset,size):
def get_group_query_result(cls, query, offset, size):
try:
q = dict()
......@@ -38,13 +38,13 @@ class GroupUtils(object):
}
}
q["_source"] = {
"includes":["id"]
"includes": ["id"]
}
return ESPerform.get_search_results(ESPerform.get_cli(), "group", q, offset, size)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0, "hits":[]}
return {"total_count": 0, "hits": []}
@classmethod
def get_hot_pictorial_recommend_result_list(cls,offset,size,es_cli_obj=None):
......@@ -57,12 +57,14 @@ class GroupUtils(object):
"bool":{
"must":[
{"term": {"is_online": True}},
{"term":{"is_deleted": False}}
{"term":{"is_deleted": False}},
{"term": {"effective": True}}
]
}
}
q["sort"] = [
{"high_quality_topic_num":{"order":"desc"}}
{"high_quality_topic_num":{"order":"desc"}},
{"create_time":{"order":"desc"}}
]
q["_source"] = {
"includes":["id"]
......@@ -80,7 +82,7 @@ class GroupUtils(object):
return []
@classmethod
def get_user_attention_pictorial_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
def get_user_attention_pictorial_list(cls, user_id, offset=0, size=10, es_cli_obj=None):
"""
:remark: 获取用户关注小组列表
:return:
......@@ -91,29 +93,32 @@ class GroupUtils(object):
q = dict()
q["query"] = {
"bool":{
"must":[
{"term":{"is_online": True}},
{"term":{"user_id":user_id}},
{"term":{"is_deleted":False}}
"bool": {
"must": [
{"term": {"is_online": True}},
{"term": {"user_id": user_id}},
{"term": {"is_deleted": False}},
{"term":{"effective":True}}
]
}
}
q["_source"] = {
"includes":["attention_pictorial_id_list"]
"includes": ["attention_pictorial_id_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj,"user",q,offset,size)
if len(result_dict["hits"])>0:
result_dict = ESPerform.get_search_results(es_cli_obj, "user", q, offset, size)
if len(result_dict["hits"]) > 0:
return result_dict["hits"][0]["_source"]["attention_pictorial_id_list"]
else:
return []
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def get_pictorial_ids_by_aggs(cls,pictorial_ids_list,es_cli_obj=None):
def get_pictorial_ids_by_aggs(cls, pictorial_ids_list, es_cli_obj=None):
"""
:remark:聚合查询获取小组列表
:param group_id_list:
......@@ -124,34 +129,34 @@ class GroupUtils(object):
es_cli_obj = ESPerform.get_cli()
q = dict()
q["size"]=0
q["size"] = 0
q["query"] = {
"terms":{
"pictorial_id":pictorial_ids_list
"terms": {
"pictorial_id": pictorial_ids_list
}
}
q["aggs"] = {
"pictorial_ids":{
"terms":{
"field":"pictorial_id"
"pictorial_ids": {
"terms": {
"field": "pictorial_id"
},
"aggs":{
"max_date":{
"max":{
"field":"update_time_val"
"aggs": {
"max_date": {
"max": {
"field": "update_time_val"
}
}
}
}
}
result_dict = ESPerform.get_search_results(es_cli_obj,"topic",q,aggregations_query=True)
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, aggregations_query=True)
buckets_list = result_dict["aggregations"]["pictorial_ids"]["buckets"]
sorted_buckets_list = sorted(buckets_list,key=lambda item:item["max_date"]["value"],reverse=True)
sorted_buckets_list = sorted(buckets_list, key=lambda item: item["max_date"]["value"], reverse=True)
sorted_pictorial_id_list = [item["key"] for item in sorted_buckets_list]
return sorted_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
\ No newline at end of file
return []
......@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
user_similar_score_list=[],index_type="topic",routing=None):
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[]):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
......@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type": 1
}
},
"weight": 6
"weight": 60
},
{
"gauss": {
......@@ -174,7 +174,7 @@ class TopicUtils(object):
"decay": 0.99
}
},
"weight": 5
"weight": 60
}
]
......@@ -194,7 +194,7 @@ class TopicUtils(object):
{
"filter": {"bool": {
"should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3,
"weight": 30,
}
)
# if len(pick_user_id_list) > 0:
......@@ -216,12 +216,12 @@ class TopicUtils(object):
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list) > 0:
if len(attention_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"tag_list": user_tag_list}}}},
"weight": 1
"should": {"terms": {"tag_list": attention_tag_list}}}},
"weight": 1000000
}
)
# if len(recommend_tag_list)>0:
......@@ -257,7 +257,7 @@ class TopicUtils(object):
"query": {
"bool": {
"filter": [
# {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"range": {"content_level": {"gte": 4, "lte": 5}}},
# {"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
......@@ -380,7 +380,7 @@ class TopicUtils(object):
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
logging.info("topic_id_list:%s,attention_tag_list%s" % (str(topic_id_list),str(attention_tag_list)))
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -435,7 +435,7 @@ class TopicUtils(object):
"query": {
"bool": {
"must": [
# {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"range": {"content_level": {"gte": 4, "lte": 5}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
......@@ -621,8 +621,7 @@ class TopicUtils(object):
"term": {k: v},
})
if not v:
if v in (None, '', []):
continue
if k in ["create_time_gte", "create_time_lte"]:
......
......@@ -53,7 +53,6 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
if sort_type == GroupSortTypes.HOT_RECOMMEND:
pictorial_ids_list = GroupUtils.get_hot_pictorial_recommend_result_list(offset, size, es_cli_obj)
return {"pictorial_recommend_ids": pictorial_ids_list}
elif sort_type == GroupSortTypes.ATTENTION_RECOMMEND:
......@@ -61,6 +60,7 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
es_cli_obj=es_cli_obj)
if len(attention_pictorial_list) == 0:
return {"pictorial_recommend_ids": []}
else:
attention_pictorial_id_list = [item["pictorial_id"] for item in attention_pictorial_list]
sorted_pictorial_ids_list = GroupUtils.get_pictorial_ids_by_aggs(attention_pictorial_id_list,
......@@ -73,7 +73,6 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
for item in sorted_attention_pictorial_list:
if item["pictorial_id"] not in pictorial_recommend_ids_list:
pictorial_recommend_ids_list.append(item["pictorial_id"])
return {"pictorial_recommend_ids": pictorial_recommend_ids_list[offset:(offset + size)]}
except:
......@@ -106,7 +105,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
"includes": ["id", "pictorial_id", "tag_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get result_dict:%s" % result_dict)
pict_pictorial_ids_list = []
topic_tag_list = []
pictorial_id_list = []
......@@ -165,7 +163,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
"term": {
"is_online": True
}
},{
}, {
"term": {
"is_deleted": False
}
......@@ -200,3 +198,49 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"pictorial_ids_list": []}
@bind("physical/search/pictorial_topic_sort")
def pictorial_topic_sort(pictorial_id=-1, offset=0, size=10):
"""
:remark 小组排序,缺少:前1天发评论人数*x
:param user_id:
:param sort_type:
:param offset:
:param size:
:return:
"""
try:
if not isinstance(pictorial_id, int):
user_id = -1
q = {
"query":{
"bool":{
"must":[
{"term":{"pictorial_id": pictorial_id}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
]
}
},
"sort":{
"total_vote_num":{"order":"desc"}
}
}
pict_pictorial_ids_list =[]
# 获取es链接对象
es_cli_obj = ESPerform.get_cli()
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get pictorial_topic_sort res:%s"%result_dict)
for item in result_dict["hits"]:
topic_id = item["_source"]["id"]
pict_pictorial_ids_list.append(topic_id)
return {"pict_pictorial_ids_list":pict_pictorial_ids_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"pict_pictorial_ids_list": []}
......@@ -24,34 +24,6 @@ def get_highlight(fields=[]):
@bind("physical/search/query_tag")
def query_tag(query,offset,size):
try:
"""
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online":True}},
{"term": {"is_deleted": False}}
],
"should":[
{"multi_match":{
"query": query,
"fields":["name"],
"operator":"and"}}
],
"minimum_should_match":1
}
},
"sort":[
{"near_new_topic_num":{"order":"desc"}},
{'_score': {"order": "desc"}}
],
"_source": {
"includes": ["id", "name"]
}
}
q["highlight"] = get_highlight(["name"])
"""
q = {
"suggest":{
"tips-suggest":{
......@@ -121,3 +93,11 @@ def query_by_tag_type(tag_type_id,offset,size):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_list": []}
@bind("physical/search/choice_push_tag")
def choice_push_tag(device_id,user_id):
try:
pass
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_list": []}
......@@ -59,6 +59,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
topic_recommend_redis_key = "physical:linucb:topic_recommend:device_id:" + str(device_id)
# click_recommend_redis_key = "physical:click_recommend:device_id:" + str(device_id)
# recommend_tag_dict = dict()
......@@ -76,6 +77,20 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key,"cursor",newcursor)
# click_recommend_topic_id_list = list()
# click_recommend_topic_list = list()
#
# click_recommend_topic_dict = redis_client.hgetall(click_recommend_redis_key)
# if b"data" in click_recommend_topic_dict:
# click_recommend_topic_id_list = json.loads(click_recommend_topic_dict[b"data"])
# cursor = int(str(click_recommend_topic_dict[b"cursor"], encoding="utf-8"))
# newcursor = cursor + 2
# if newcursor < 4 and len(click_recommend_topic_id_list) ==2:
# for i in range(0,2):
# click_recommend_topic_list.append(click_recommend_topic_id_list[i])
# redis_client.hset(click_recommend_redis_key, "cursor", newcursor)
# combine_recommend_topic_list_tmp = click_recommend_topic_list.extend(recommend_topic_list)
# combine_recommend_topic_list = combine_recommend_topic_list_tmp[0:5]
have_read_topic_id_list = list()
if redis_field_val_list[0]:
......@@ -90,15 +105,23 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
size = size-len(recommend_topic_list)
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
logging.info("attention_tag_list:%s"%(str(attention_tag_list)))
if len(recommend_topic_list)>0:
size = size-len(recommend_topic_list)
have_read_topic_id_list.extend(recommend_topic_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_topic_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5")
user_similar_score_list=user_similar_score_redis_list,index_type="topic",routing="4,5",attention_tag_list=attention_tag_list)
have_read_group_id_set = set()
have_read_user_id_set = set()
unread_topic_id_dict = dict()
logging.info("attention_tag_list:%s"%(str(topic_id_list)))
# # 当前页小组数量
# cur_page_group_num = 0
......@@ -136,8 +159,9 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(recommend_topic_list)
topic_id_list.extend(recommend_topic_list)
# recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 30000:
cut_len = len(have_read_topic_id_list)-30000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
......@@ -148,7 +172,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 每个session key保存15分钟
redis_client.expire(redis_key, 60 * 60 * 24 * 30)
return recommend_topic_list
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
......
......@@ -15,7 +15,7 @@
"update_time":{"type":"date", "format":"date_time_no_millis"},
"tag_id":{"type":"long"},
"tag_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_id_list":{"type":"long"}
"topic_id_list":{"type":"long"},
"effective":{"type":"boolean"}
}
}
\ No newline at end of file
{
"dynamic":"strict",
"_routing": {"required": true},
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"total_vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
......@@ -48,7 +50,7 @@
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
},
"is_excellent":{"type": "long"}
}
}
{
"dynamic":"strict",
"_routing": {"required": true},
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"total_vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
......@@ -48,7 +50,7 @@
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
},
"is_excellent":{"type": "long"}
}
}
......@@ -5,12 +5,14 @@
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"total_vote_num":{"type":"long","default":0},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"user_nick_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},//帖子用户名
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
......
{
"dynamic":"strict",
"_routing": {"required": true},
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
......@@ -49,7 +50,7 @@
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
},
"is_excellent":{"type": "long"}
}
}
......@@ -72,6 +72,26 @@ class Pictorial(models.Model):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def get_effective(self,topic_id_list):
try:
effective_num = 0
ret = False
for topic_id in topic_id_list:
topic_id_object = Topic.objects.filter(id=int(topic_id)).first()
if topic_id_object and topic_id_object.is_online and int(topic_id_object.content_level) in [0,3,4,5]:
effective_num += 1
if effective_num >= 5:
ret = True
break
return ret
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
# 获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
try:
......
......@@ -19,6 +19,8 @@ class TopicTag(models.Model):
tag_id = models.IntegerField(verbose_name=u"标签ID")
topic_id = models.IntegerField(verbose_name=u"帖子ID")
is_online = models.BooleanField(verbose_name=u"是否在线")
is_collection = models.IntegerField(verbose_name=u"是否编辑标签")
class AccountUserTag(models.Model):
......
......@@ -85,9 +85,12 @@ class Topic(models.Model):
def get_virtual_vote_num(self):
try:
topic_extra = TopicExtra.object.get(topic_id=self.id)
return topic_extra.virtual_vote_num
topic_extra = TopicExtra.objects.filter(topic_id=self.id).first()
virtual_vote_num = topic_extra.virtual_vote_num if topic_extra else 0
return virtual_vote_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0
def get_pictorial_id(self):
......@@ -104,11 +107,14 @@ class Topic(models.Model):
@property
def is_complaint(self):
"""是否被举报"""
try:
if TopicComplaint.objects.filter(topic_id=self.id, is_online=True).exists():
return True
if TopicComplaint.objects.filter(topic_id=self.id, is_online=True).exists():
return True
return False
return False
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def topic_has_image(self):
try:
......@@ -194,14 +200,14 @@ class Topic(models.Model):
elif self.content_level == '3':
offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
if exposure_count > 0:
offline_score += click_count / exposure_count
if uv_num > 0:
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
# uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
#
# if exposure_count > 0:
# offline_score += click_count / exposure_count
# if uv_num > 0:
# offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
......@@ -244,7 +250,6 @@ class PictorialTopic(models.Model):
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
......@@ -254,8 +259,25 @@ class TopicExtra(models.Model):
class Meta:
verbose_name = '帖子额外信息'
app_label = 'community'
db_table = 'topic_extra'waq
db_table = 'topic_extra'
id = models.IntegerField(verbose_name=u'ID', primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID",db_index=True)
virtual_vote_num = models.IntegerField(verbose_name="帖子虚拟点赞")
class ExcellentTopic(models.Model):
"""优质帖子"""
class Meta:
verbose_name = "优质帖子"
db_table = "excellent_topic"
id = models.IntegerField(verbose_name=u'ID', primary_key=True)
topic_id = models.IntegerField(verbose_name=u"帖子ID",db_index=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
excellent_type = models.IntegerField(verbose_name=u"优质类型",db_index=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
......@@ -11,17 +11,19 @@ from libs.es import ESPerform
import elasticsearch
import elasticsearch.helpers
import sys
from libs.cache import redis_client
import copy
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer
from trans2es.utils.topic_transfer import TopicTransfer
from trans2es.utils.excellect_topic_transfer import ExcellectTopicTransfer
from trans2es.utils.pictorial_transfer import PictorialTransfer
from trans2es.utils.celebrity_transfer import CelebrityTransfer
from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.contrast_similar_transfer import Contrast_Similar_Transfer
from libs.es import ESPerform
__es = None
......@@ -62,6 +64,7 @@ class TypeInfo(object):
self.round_insert_chunk_size = round_insert_chunk_size
self.round_insert_period = round_insert_period
self.logic_database_id = logic_database_id
self.physical_topic_star = "physical:topic_star"
@property
def query(self):
......@@ -139,9 +142,25 @@ class TypeInfo(object):
pk,
))
else:
data_list.append(data)
if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
topic_data_high_star_list.append(data)
if data:
if self.type == "topic":
ori_topic_star = redis_client.hget(self.physical_topic_star, data["id"])
if not ori_topic_star:
redis_client.hset(self.physical_topic_star, data["id"], data["content_level"])
else:
int_ori_topic_star = int(str(ori_topic_star, encoding="utf-8"))
if int_ori_topic_star != data["content_level"]:
old_data = copy.deepcopy(data)
old_data["is_online"] = False
old_data["is_deleted"] = True
old_data["content_level"] = int_ori_topic_star
data_list.append(old_data)
redis_client.hset(self.physical_topic_star, data["id"], data["content_level"])
data_list.append(data)
# if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
# topic_data_high_star_list.append(data)
return (data_list,topic_data_high_star_list)
......@@ -199,22 +218,30 @@ class TypeInfo(object):
time2=end-begin
begin = time.time()
logging.info("get sub_index_name:%s"%sub_index_name)
logging.info("get data_list:%s"%data_list)
# logging.info("get sub_index_name:%s"%sub_index_name)
# logging.info("get data_list:%s"%data_list)
self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name,
data_list=data_list,
es=es,
)
# 同时写4星及以上的帖子
if len(topic_data_high_star_list)>0:
if sub_index_name=="topic":
self.elasticsearch_bulk_insert_data(
sub_index_name="topic-high-star",
data_list=topic_data_high_star_list,
sub_index_name="topic-star-routing",
data_list=data_list,
es=es,
)
# # 同时写4星及以上的帖子
# if len(topic_data_high_star_list)>0:
# self.elasticsearch_bulk_insert_data(
# sub_index_name="topic-high-star",
# data_list=topic_data_high_star_list,
# es=es,
# )
end = time.time()
time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
......@@ -294,6 +321,16 @@ def get_type_info_map():
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='excellect-topic', # 优质帖子
type='excellect-topic',
model=topic.ExcellentTopic,
query_deferred=lambda: topic.ExcellentTopic.objects.all().query,
get_data_func=ExcellectTopicTransfer.get_excellect_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='topic', # 日记
type='topic',
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc
import time
import re
import datetime
from trans2es.models.user import User
from trans2es.models.topic import Topic
from trans2es.utils.topic_transfer import TopicTransfer
class ExcellectTopicTransfer(object):
@classmethod
def get_excellect_topic_data(cls,instance):
try:
topic_id = instance.topic_id
is_online = instance.is_online
is_deleted = instance.is_deleted
res = None
if is_online and not is_deleted:
topic_ins = Topic.objects.filter(id=topic_id).first()
if topic_ins:
res = TopicTransfer.get_topic_data(topic_ins,is_excellect=True)
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
......@@ -37,6 +37,7 @@ class PictorialTransfer(object):
res["tag_id"] = tag_id
res["tag_name"] = instance.get_tag_by_name(tag_id)
res["topic_id_list"] =instance.get_topic_id()
res["effective"] = instance.get_effective(res["topic_id_list"])
return res
except:
......
......@@ -9,11 +9,12 @@ import time
import re
import datetime
from trans2es.models.user import User
from trans2es.models.topic import ExcellentTopic
class TopicTransfer(object):
@classmethod
def get_topic_data(cls,instance):
def get_topic_data(cls,instance,is_excellect=False):
try:
res = dict()
......@@ -111,6 +112,15 @@ class TopicTransfer(object):
res["total_vote_num"] = instance.get_virtual_vote_num() + instance.vote_num
if is_excellect:
res["is_excellent"] = 1
else:
excelllect_object = ExcellentTopic.objects.filter(topic_id=instance.id).first()
if excelllect_object and excelllect_object.is_online and not excelllect_object.is_deleted:
res["is_excellent"] = 1
else:
res["is_excellent"] = 0
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res
except:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment