Commit 62ab70c8 authored by lixiaofang's avatar lixiaofang

Merge branch 'test' of git.wanmeizhensuo.com:alpha/physical into test

parents 67997353 c90bab1a
...@@ -24,29 +24,5 @@ ...@@ -24,29 +24,5 @@
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" /> <option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings> </formatting-settings>
</DBN-SQL> </DBN-SQL>
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme> </code_scheme>
</component> </component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (venv)" project-jdk-type="Python SDK" /> <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (physical1)" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser"> <component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" /> <option name="shown" value="true" />
</component> </component>
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<module type="PYTHON_MODULE" version="4"> <module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager"> <component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" /> <content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (venv)" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="Python 3.6 (physical1)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="TestRunnerService"> <component name="TestRunnerService">
......
...@@ -286,6 +286,89 @@ class ESPerform(object): ...@@ -286,6 +286,89 @@ class ESPerform(object):
@classmethod @classmethod
def get_tag_topic_list(cls, tag_id, have_read_topic_id_list, size=100): def get_tag_topic_list(cls, tag_id, have_read_topic_id_list, size=100):
try:
functions_list = list()
# for id in tag_id:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": id}},
# "weight": 1
# }
# )
functions_list += [
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 6}}
}
},
"weight": 60
},
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 5}}
}
},
"weight": 50
},
{
"filter": {
"constant_score":{
"filter":{
"term": {"content_level": 4}}
}
},
"weight": 40
}
]
q = {
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{"range": {"content_level": {"gte": 4, "lte": 6}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"terms": {"tag_list": tag_id}}
]
}
},
"boost_mode": "sum",
"score_mode": "sum",
"functions": functions_list
}
},
"_source": {
"include": ["id"]
},
"sort": [
{"_score": {"order": "desc"}},
{"create_time_val": {"order": "desc"}},
# {"language_type": {"order": "asc"}},
]
}
if len(have_read_topic_id_list) > 0:
q["query"]["function_score"]["query"]["bool"]["must_not"] = {
"terms": {
"id": have_read_topic_id_list
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=size, routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s" % str(topic_id_list))
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_tag_topic_list_dict(cls, tag_id, have_read_topic_id_list, size=100):
try: try:
functions_list = list() functions_list = list()
for id in tag_id: for id in tag_id:
...@@ -328,7 +411,7 @@ class ESPerform(object): ...@@ -328,7 +411,7 @@ class ESPerform(object):
} }
}, },
"_source": { "_source": {
"include": ["id"] "include": ["id","user_id"]
}, },
"sort": [ "sort": [
{"_score": {"order": "desc"}}, {"_score": {"order": "desc"}},
...@@ -346,8 +429,15 @@ class ESPerform(object): ...@@ -346,8 +429,15 @@ class ESPerform(object):
offset=0, size=size, routing="4,5,6") offset=0, size=size, routing="4,5,6")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
logging.info("topic_id_list:%s" % str(topic_id_list)) # logging.info("topic_id_list:%s" % str(topic_id_list))
return topic_id_list # topic_id_dict = [{str(item["_source"]["id"]):item["_source"]["user_id"]} for item in result_dict["hits"]]
topic_id_dict = dict()
for item in result_dict["hits"]:
topic_id_dict[str(item["_source"]["id"])] = item["_source"]["user_id"]
logging.info("topic_id_list:%s" % str(topic_id_dict))
return topic_id_list,topic_id_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -74,18 +74,23 @@ class CollectData(object): ...@@ -74,18 +74,23 @@ class CollectData(object):
have_read_topic_id_list.extend(promote_recommend_topic_id_list) have_read_topic_id_list.extend(promote_recommend_topic_id_list)
recommend_topic_id_list = list() recommend_topic_id_list = list()
recommend_topic_id_list_dict = dict()
recommend_topic_id_list_click = list() recommend_topic_id_list_click = list()
recommend_topic_id_list_click_dict = dict()
if click_topic_tag_list: if click_topic_tag_list:
if len(click_topic_tag_list)>0: if len(click_topic_tag_list)>0:
recommend_topic_id_list_click = ESPerform.get_tag_topic_list(click_topic_tag_list, recommend_topic_id_list_click,recommend_topic_id_list_click_dict = ESPerform.get_tag_topic_list_dict(click_topic_tag_list,
have_read_topic_id_list,size=2) have_read_topic_id_list,size=2)
if len(recommend_topic_id_list_click) > 0: if len(recommend_topic_id_list_click) > 0:
recommend_topic_id_list.extend(recommend_topic_id_list_click) recommend_topic_id_list.extend(recommend_topic_id_list_click)
have_read_topic_id_list.extend(recommend_topic_id_list) recommend_topic_id_list_dict.update(recommend_topic_id_list_click_dict)
have_read_topic_id_list.extend(recommend_topic_id_list_click)
click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id) click_recommend_redis_key = self.click_recommend_redis_key_prefix + str(device_id)
click_redis_data_dict = { click_redis_data_dict = {
"data": json.dumps(recommend_topic_id_list), "data": json.dumps(recommend_topic_id_list),
"datadict":json.dumps(recommend_topic_id_list_dict),
"cursor": 0 "cursor": 0
} }
redis_client.hmset(click_recommend_redis_key, click_redis_data_dict) redis_client.hmset(click_recommend_redis_key, click_redis_data_dict)
...@@ -101,28 +106,32 @@ class CollectData(object): ...@@ -101,28 +106,32 @@ class CollectData(object):
b"data"] else [] b"data"] else []
cursor = int(str(redis_topic_data_dict[b"cursor"], encoding="utf-8")) cursor = int(str(redis_topic_data_dict[b"cursor"], encoding="utf-8"))
if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0: if len(recommend_topic_id_list)==0 and cursor==0 and len(redis_topic_list)>0:
have_read_topic_id_list.extend(redis_topic_list[:2]) have_read_topic_id_list.extend(redis_topic_list[:2])
if len(tag_id_list) > 0: tag_topic_dict = dict()
if len(new_user_click_tag_list)>0: if len(new_user_click_tag_list)>0:
tag_topic_id_list = ESPerform.get_tag_topic_list(new_user_click_tag_list, have_read_topic_id_list) tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(new_user_click_tag_list, have_read_topic_id_list)
logging.warning("tag_topic_id_list:%s" % str(new_user_click_tag_list))
logging.warning("tag_id_list:%s" % str(tag_id_list))
else: else:
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list) tag_topic_id_list,tag_topic_dict = ESPerform.get_tag_topic_list_dict(tag_id_list,have_read_topic_id_list)
if len(recommend_topic_id_list)>0 or len(new_user_click_tag_list) > 0: if len(recommend_topic_id_list)>0 or len(new_user_click_tag_list) > 0:
tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list tag_topic_id_list = recommend_topic_id_list + tag_topic_id_list
tag_topic_dict = recommend_topic_id_list_dict.update(tag_topic_dict)
logging.warning("catch exception,err_msg:%s" % str(tag_topic_dict))
redis_data_dict = { redis_data_dict = {
"data": json.dumps(tag_topic_id_list), "data": json.dumps(tag_topic_id_list),
"datadict":json.dumps(tag_topic_dict),
"cursor":0 "cursor":0
} }
redis_client.hmset(topic_recommend_redis_key,redis_data_dict) redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
else: else:
if cursor<=0 and len(redis_topic_list)>0: if cursor<=0 and len(redis_topic_list)>0:
tag_topic_id_list = redis_topic_list[:2] + tag_topic_id_list tag_topic_dict = list()
tag_topic_id_list = list(set(tag_topic_id_list)) tag_topic_dict = redis_topic_list[:2]
tag_topic_dict = list(set(tag_topic_dict))
return True return True
except: except:
...@@ -251,9 +260,7 @@ class CollectData(object): ...@@ -251,9 +260,7 @@ class CollectData(object):
else: else:
tagid_list = list() tagid_list = list()
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type")) logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
logging.info(
"consume click topic_id:%s,device_id:%s" % (
str(tagid_list), str(device_id)))
device_id = raw_val_dict["device"]["device_id"] device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
......
...@@ -122,7 +122,7 @@ class TopicUtils(object): ...@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod @classmethod
def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE, def get_recommend_topic_ids(cls,user_id,tag_id,offset,size,single_size,query=None,query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[], filter_topic_id_list=[],test_score=False,must_topic_id_list=[],recommend_tag_list=[],
user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],current_topic_id =-1,topic_tag_list =[],topic_user_id =-1): user_similar_score_list=[],index_type="topic",routing=None,attention_tag_list=[],linucb_user_id_list = []):
""" """
:remark:获取首页推荐帖子列表 :remark:获取首页推荐帖子列表
:param user_id: :param user_id:
...@@ -177,32 +177,28 @@ class TopicUtils(object): ...@@ -177,32 +177,28 @@ class TopicUtils(object):
}, },
{ {
"filter": { "filter": {
"constant_score":{
"filter":{
"term": { "term": {
"content_level": 6 "content_level": 6
} }
}, }
"weight": 1000
},
{
"filter": {
"term": {
"is_excellent": 1
} }
}, },
"weight": 500 "weight": 600
} }
] ]
if len(user_similar_score_list) > 0: # if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]: # for item in user_similar_score_list[:100]:
score_item = 2 + item[1] # score_item = 2 + item[1]
functions_list.append( # functions_list.append(
{ # {
"filter": {"bool": { # "filter": {"bool": {
"should": {"term": {"user_id": item[0]}}}}, # "should": {"term": {"user_id": item[0]}}}},
"weight": score_item, # "weight": score_item,
} # }
) # )
if len(attention_user_id_list) > 0: if len(attention_user_id_list) > 0:
functions_list.append( functions_list.append(
...@@ -220,25 +216,7 @@ class TopicUtils(object): ...@@ -220,25 +216,7 @@ class TopicUtils(object):
"weight": 100 "weight": 100
} }
) )
if current_topic_id != -1:
if len(topic_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"tag_list": topic_tag_list}}}},
"weight": 2000
}
)
if topic_user_id != -1:
functions_list.append(
{
"filter": {"bool": {
"should": {"term": {"user_id": topic_user_id}}}},
"weight": 1500
}
)
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
...@@ -281,12 +259,14 @@ class TopicUtils(object): ...@@ -281,12 +259,14 @@ class TopicUtils(object):
query_function_score["query"]["bool"]["must_not"] = [ query_function_score["query"]["bool"]["must_not"] = [
{"terms":{"id":filter_topic_id_list}} {"terms":{"id":filter_topic_id_list}}
] ]
if current_topic_id != -1: if "must_not" in query_function_score["query"]["bool"]:
query_function_score["query"]["bool"]["must_not"] = [{ query_function_score["query"]["bool"]["must_not"] += [
"term": { {"terms": {"user_id": linucb_user_id_list}}
"id": current_topic_id ]
} else:
}] query_function_score["query"]["bool"]["must_not"] = [
{"term": {"user_id": linucb_user_id_list}}
]
if query is not None: # 搜索帖子 if query is not None: # 搜索帖子
multi_fields = { multi_fields = {
'description': 200, 'description': 200,
......
...@@ -155,7 +155,7 @@ def choice_push_tag(device_id, user_id): ...@@ -155,7 +155,7 @@ def choice_push_tag(device_id, user_id):
@bind("physical/search/identity_tag_name") @bind("physical/search/identity_tag_name")
def identity_tag_name(topic_content): def identity_tag_name(topic_content):
try: try:
ret_tag_list = list() ret_tag_set = set()
redis_key_name = "physical:tag_name_set" redis_key_name = "physical:tag_name_set"
body = { body = {
...@@ -171,9 +171,9 @@ def identity_tag_name(topic_content): ...@@ -171,9 +171,9 @@ def identity_tag_name(topic_content):
token_word = item["token"] token_word = item["token"]
is_member = redis_client.sismember(redis_key_name, token_word) is_member = redis_client.sismember(redis_key_name, token_word)
if is_member: if is_member:
ret_tag_list.append(token_word) ret_tag_set.add(token_word)
return {"tag_name_list": ret_tag_list} return {"tag_name_list": list(ret_tag_set)}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_name_list": []} return {"tag_name_list": []}
......
...@@ -73,10 +73,10 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -73,10 +73,10 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
if offset>0: # 首次搜索时不需要过滤已读 if offset>0: # 首次搜索时不需要过滤已读
have_read_topic_id_list = list(json.loads(redis_field_val_list[0])) have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id) # user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key) # redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
user_similar_score_redis_list = json.loads( # user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else [] # redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
attention_tag_list = list() attention_tag_list = list()
recommend_topic_list = list() recommend_topic_list = list()
...@@ -87,6 +87,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -87,6 +87,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key) recommend_topic_dict = redis_client.hgetall(topic_recommend_redis_key)
if b"data" in recommend_topic_dict: if b"data" in recommend_topic_dict:
recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"]) recommend_topic_id_list = json.loads(recommend_topic_dict[b"data"])
# 推荐帖子是强插的,要保证推荐帖子不在已读里 # 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list)) recommend_topic_id_list = list(set(recommend_topic_id_list) - set(have_read_topic_id_list))
cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8")) cursor = int(str(recommend_topic_dict[b"cursor"], encoding="utf-8"))
...@@ -95,6 +97,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -95,6 +97,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_list = recommend_topic_id_list[cursor:newcursor] recommend_topic_list = recommend_topic_id_list[cursor:newcursor]
redis_client.hset(topic_recommend_redis_key, "cursor", newcursor) redis_client.hset(topic_recommend_redis_key, "cursor", newcursor)
recommend_topic_user_list = list()
if b"datadict" in recommend_topic_dict:
recommend_topic_id_dict = json.loads(recommend_topic_dict[b"datadict"])
if len(recommend_topic_list) == 6:
for i in recommend_topic_list:
recommend_topic_user_list.append(recommend_topic_id_dict[str(i)])
# 用户关注标签 # 用户关注标签
redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id) redis_tag_data = redis_client.hget("physical:linucb:register_user_tag_info", user_id)
attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else [] attention_tag_list = json.loads(redis_tag_data) if redis_tag_data else []
...@@ -114,7 +122,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query ...@@ -114,7 +122,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size, rank_topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=0, size=size,
single_size=size,query=query, query_type=query_type, single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list, filter_topic_id_list=have_read_topic_id_list,
user_similar_score_list=user_similar_score_redis_list,index_type="topic-high-star",routing="4,5,6",attention_tag_list=attention_tag_list) index_type="topic-high-star",routing="4,5,6",attention_tag_list=attention_tag_list,linucb_user_id_list=recommend_topic_user_list)
if len(recommend_topic_list) == 6 and query is None: if len(recommend_topic_list) == 6 and query is None:
if (size < 11): if (size < 11):
......
...@@ -209,7 +209,7 @@ class Topic(models.Model): ...@@ -209,7 +209,7 @@ class Topic(models.Model):
user_query_results = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id) user_query_results = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id)
if user_query_results.count() > 0: if user_query_results.count() > 0:
if user_query_results[0].is_recommend: if user_query_results[0].is_recommend:
offline_score += 2.0 offline_score += 2.0 *10
elif user_query_results[0].is_shadow: elif user_query_results[0].is_shadow:
user_is_shadow = True user_is_shadow = True
...@@ -219,18 +219,18 @@ class Topic(models.Model): ...@@ -219,18 +219,18 @@ class Topic(models.Model):
# 帖子等级 # 帖子等级
if self.content_level == '5': if self.content_level == '5':
offline_score += 6.0 offline_score += 100.0 *3
elif self.content_level == '4': elif self.content_level == '4':
offline_score += 5.0 offline_score += 60.0 *3
elif self.content_level == '6': elif self.content_level == '6':
offline_score += 400.0 offline_score += 200.0 *3
is_excellent = self.judge_if_excellent_topic(self.id) # is_excellent = self.judge_if_excellent_topic(self.id)
if is_excellent: # if is_excellent:
offline_score += 200.0 # offline_score += 200.0
if self.language_type == 1: if self.language_type == 1:
offline_score += 100.0 offline_score += 60.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count() # exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count() # click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
# uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count() # uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment