Commit 62ab70c8 authored by lixiaofang's avatar lixiaofang

Merge branch 'test' of git.wanmeizhensuo.com:alpha/physical into test

parents 67997353 c90bab1a
......@@ -24,29 +24,5 @@
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (venv)" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (physical1)" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>
......
......@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (venv)" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.6 (physical1)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
......
......@@ -286,6 +286,89 @@ class ESPerform(object):
@classmethod
def get_tag_topic_list(cls, tag_id, have_read_topic_id_list, size=100):
try:
functions_list = list()
# for id in tag_id:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": id}},
# "weight": 1
# }
# )
functions_list += [
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 6}}
}
},
"weight": 60
},
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 5}}
}
},
"weight": 50
},
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 4}}
}
},
"weight": 40
}
]
q = {
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{"range": {"content_level": {"gte": 4, "lte": 6}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}}
]
}
},
"boost_mode": "sum",
"score_mode": "sum",
"functions": functions_list
}
},
"_source": {
"include": ["id"]
},
"sort": [
{"_score": {"order": "desc"}},
{"create_time_val": {"order": "desc"}},
# {"language_type": {"order": "asc"}},
]
}
if len(have_read_topic_id_list) > 0:
q["query"]["function_score"]["query"]["bool"]["must_not"] = {
"terms": {
"id": have_read_topic_id_list
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=size, routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s" % str(topic_id_list))
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_tag_topic_list_dict(cls, tag_id, have_read_topic_id_list, size=100):
try:
functions_list = list()
for id in tag_id:
......@@ -328,7 +411,7 @@ class ESPerform(object):
}
},
"_source": {
"include": ["id"]
"include": ["id","user_id"]
},
"sort": [
{"_score": {"order": "desc"}},
......@@ -346,8 +429,15 @@ class ESPerform(object):
offset=0, size=size, routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s" % str(topic_id_list))
return topic_id_list
# logging.info("topic_id_list:%s" % str(topic_id_list))
# topic_id_dict = [{str(item["_source"]["id"]):item["_source"]["user_id"]} for item in result_dict["hits"]]
topic_id_dict = dict()
for item in result_dict["hits"]:
topic_id_dict[str(item["_source"]["id"])] = item["_source"]["user_id"]
logging.info("topic_id_list:%s" % str(topic_id_dict))
return topic_id_list,topic_id_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
......@@ -74,18 +74,23 @@ class CollectData(object):
have_read_topic_id_list.extend(promote_recommend_topic_id_list)
recommend_topic_id_list = list()
recommend_topic_id_list_dict = dict()
recommend_topic_id_list_click = list()
recommend_topic_id_list_click_dict = dict()
if click_topic_tag_list:
if len(click_topic_tag_list)>0:
recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list,
recommend_topic_id_list_click,recommend_topic_id_list_click_dict = ESPerform.get_tag_topic_list_dict(click_topic_tag_list,
have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0:
recommend_topic_id_list.extend(recommend_topic_id_list_click)
have_read_topic_id_list.extend(recommend_topic_id_list)
recommend_topic_id_list_dict.update(recommend_topic_id_list_click_dict)
have_read_topic_id_list.extend(recommend_topic_id_list_click)
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
click_redis_data_dict = {
"data": json.dumps(recommend_topic_id_list),
"datadict":json.dumps(recommend_topic_id_list_dict),
"cursor": 0
}
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
......@@ -101,28 +106,32 @@ class CollectData(object):
b"data"] else []
cursor = int(str(redis_topic_data_dict[b"cursor"], encoding="utf-8"))
if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0:
have_read_topic_id_list.extend(redis_topic_list[:2])
if len(tag_id_list) > 0:
if len(new_user_click_tag_list)>0:
tag_topic_id_list = ESPerform.get_tag_topic_list(new_user_click_tag_list, have_read_topic_id_list)
logging.warning("tag_topic_id_list:%s" % str(new_user_click_tag_list))
logging.warning("tag_id_list:%s" % str(tag_id_list))
else:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list)
tag_topic_dict = dict()
if len(new_user_click_tag_list)>0:
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(new_user_click_tag_list, have_read_topic_id_list)
else:
tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(tag_id_list,have_read_topic_id_list)
if len(recommend_topic_id_list)>0 or len(new_user_click_tag_list) > 0:
tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list
tag_topic_dict = recommend_topic_id_list_dict.update(tag_topic_dict)
logging.warning("catch exception,err_msg:%s" % str(tag_topic_dict))
redis_data_dict = {
"data": json.dumps(tag_topic_id_list),
"datadict":json.dumps(tag_topic_dict),
"cursor":0
}
redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
else:
if cursor<=0 and len(redis_topic_list)>0:
tag_topic_id_list = redis_topic_list[:2] + tag_topic_id_list
tag_topic_id_list = list(set(tag_topic_id_list))
tag_topic_dict = list()
tag_topic_dict = redis_topic_list[:2]
tag_topic_dict = list(set(tag_topic_dict))
return True
except:
......@@ -251,9 +260,7 @@ class CollectData(object):
else:
tagid_list = list()
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
logging.info(
"consume click topic_id:%s,device_id:%s" % (
str(tagid_list), str(device_id)))
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
......
......@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],current_topic_id =-1,topic_tag_list =[],topic_user_id =-1):
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],linucb_user_id_list = []):
"""
:remark:获取首页推荐帖子列表
:param user_id:
......@@ -177,32 +177,28 @@ class TopicUtils(object):
},
{
"filter": {
"term": {
"content_level": 6
}
},
"weight": 1000
},
{
"filter": {
"term": {
"is_excellent": 1
"constant_score":{
"filter":{
"term": {
"content_level": 6
}
}
}
},
"weight": 500
"weight": 600
}
]
if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]:
score_item = 2 + item[1]
functions_list.append(
{
"filter": {"bool": {
"should": {"term": {"user_id": item[0]}}}},
"weight": score_item,
}
)
# if len(user_similar_score_list) > 0:
# for item in user_similar_score_list[:100]:
# score_item = 2 + item[1]
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"term": {"user_id": item[0]}}}},
# "weight": score_item,
# }
# )
if len(attention_user_id_list) > 0:
functions_list.append(
......@@ -220,25 +216,7 @@ class TopicUtils(object):
"weight": 100
}
)
if current_topic_id != -1:
if len(topic_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"tag_list": topic_tag_list}}}},
"weight": 2000
}
)
if topic_user_id != -1:
functions_list.append(
{
"filter": {"bool": {
"should": {"term": {"user_id": topic_user_id}}}},
"weight": 1500
}
)
query_function_score = {
"query": {
"bool": {
......@@ -281,12 +259,14 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["must_not"] = [
{"terms":{"id":filter_topic_id_list}}
]
if current_topic_id != -1:
query_function_score["query"]["bool"]["must_not"] = [{
"term": {
"id": current_topic_id
}
}]
if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] += [
{"terms": {"user_id": linucb_user_id_list}}
]
else:
query_function_score["query"]["bool"]["must_not"] = [
{"term": {"user_id": linucb_user_id_list}}
]
if query is not None: # 搜索帖子
multi_fields = {
'description': 200,
......
......@@ -155,7 +155,7 @@ def choice_push_tag(device_id, user_id):
@bind("physical/search/identity_tag_name")
def identity_tag_name(topic_content):
try:
ret_tag_list = list()
ret_tag_set = set()
redis_key_name = "physical:tag_name_set"
body = {
......@@ -171,9 +171,9 @@ def identity_tag_name(topic_content):
token_word = item["token"]
is_member = redis_client.sismember(redis_key_name, token_word)
if is_member:
ret_tag_list.append(token_word)
ret_tag_set.add(token_word)
return {"tag_name_list": ret_tag_list}
return {"tag_name_list": list(ret_tag_set)}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_name_list": []}
......
......@@ -73,10 +73,10 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
if offset>0: # 首次搜索时不需要过滤已读
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
# user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
# redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
# user_similar_score_redis_list = json.loads(
# redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
attention_tag_list = list()
recommend_topic_list = list()
......@@ -87,6 +87,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list))
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
......@@ -95,6 +97,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key, "cursor", newcursor)
recommend_topic_user_list = list()
if b"datadict" in recommend_topic_dict:
recommend_topic_id_dict = json.loads(recommend_topic_dict[b"datadict"])
if len(recommend_topic_list) == 6:
for i in recommend_topic_list:
recommend_topic_user_list.append(recommend_topic_id_dict[str(i)])
# 用户关注标签
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
......@@ -114,7 +122,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star",routing="4,5,6",attention_tag_list=attention_tag_list)
index_type="topic-high-star",routing="4,5,6",attention_tag_list=attention_tag_list,linucb_user_id_list=recommend_topic_user_list)
if len(recommend_topic_list) == 6 and query is None:
if (size < 11):
......
......@@ -209,7 +209,7 @@ class Topic(models.Model):
user_query_results = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id)
if user_query_results.count() > 0:
if user_query_results[0].is_recommend:
offline_score += 2.0
offline_score += 2.0 *10
elif user_query_results[0].is_shadow:
user_is_shadow = True
......@@ -219,18 +219,18 @@ class Topic(models.Model):
# 帖子等级
if self.content_level == '5':
offline_score += 6.0
offline_score += 100.0 *3
elif self.content_level == '4':
offline_score += 5.0
offline_score += 60.0 *3
elif self.content_level == '6':
offline_score += 400.0
offline_score += 200.0 *3
is_excellent = self.judge_if_excellent_topic(self.id)
if is_excellent:
offline_score += 200.0
# is_excellent = self.judge_if_excellent_topic(self.id)
# if is_excellent:
# offline_score += 200.0
if self.language_type == 1:
offline_score += 100.0
offline_score += 60.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
# uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment