Commit 7be67b71 authored by 段英荣's avatar 段英荣

Merge branch 'hk_now' into 'master'

revise linucb recommend topic

See merge request alpha/physical!233
parents a1699140 e0dc1c54
...@@ -264,29 +264,43 @@ class ESPerform(object): ...@@ -264,29 +264,43 @@ class ESPerform(object):
return True return True
@classmethod @classmethod
def get_tag_topic_list(cls,tag_id): def get_tag_topic_list(cls,tag_id,have_read_topic_id_list):
try: try:
functions_list = list()
for id in tag_id:
functions_list.append(
{
"filter": {"term": {"tag_list": id}},
"weight": 1
}
)
q = { q = {
"query":{ "query":{
"bool":{ "bool":{
"must":[ "must":[
{"range": {"content_level": {"gte": 4, "lte": 5}}}, {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term":{"is_online": True}}, {"term":{"is_online": True}},
{"term":{"is_deleted": False}}, {"term":{"is_deleted": False}},
{"term":{"tag_list":tag_id}} {"terms":{"tag_list":tag_id}}
] ]
} }
}, },
"boost_mode": "sum",
"score_mode": "sum",
"functions": functions_list,
"_source":{ "_source":{
"include":["id"] "include":["id"]
}, },
"sort":[ "sort":[
{"_score": {"order": "desc"}},
{"create_time_val":{"order":"desc"}}, {"create_time_val":{"order":"desc"}},
{"language_type":{"order":"asc"}}, {"language_type":{"order":"asc"}},
] ]
} }
if len(have_read_topic_id_list)>0:
q["query"]["bool"]["must_not"] = have_read_topic_id_list
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=0, size=5000,routing="4,5") offset=0, size=100,routing="3,4,5")
topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]] topic_id_list = [item["_source"]["id"] for item in result_dict["hits"]]
return topic_id_list return topic_id_list
......
...@@ -7,7 +7,7 @@ from libs.cache import redis_client ...@@ -7,7 +7,7 @@ from libs.cache import redis_client
import logging import logging
from linucb.views.linucb import LinUCB from linucb.views.linucb import LinUCB
import json import json
from trans2es.models.tag import TopicTag from trans2es.models.tag import TopicTag,Tag
import traceback import traceback
from django.conf import settings from django.conf import settings
from libs.es import ESPerform from libs.es import ESPerform
...@@ -74,28 +74,37 @@ class CollectData(object): ...@@ -74,28 +74,37 @@ class CollectData(object):
have_read_topic_id_list = Tools.get_have_read_topic_id_list(device_id,-1,TopicPageType.HOME_RECOMMEND) have_read_topic_id_list = Tools.get_have_read_topic_id_list(device_id,-1,TopicPageType.HOME_RECOMMEND)
recommend_topic_id_list = list() recommend_topic_id_list = list()
for index in range(0,100): tag_id_list = recommend_tag_list[0:100]
recommend_topic_id_list_random = list() tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id_list,have_read_topic_id_list)
for tag_id in recommend_tag_list[0:100]: # for index in range(0,100):
redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id) # recommend_topic_id_list_es = list()
redis_tag_id_data = redis_client.get(redis_tag_id_key) # for tag_id in recommend_tag_list[0:100]:
tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else [] # redis_tag_id_key = self.tag_topic_id_redis_prefix + str(tag_id)
if not redis_tag_id_data: # redis_tag_id_data = redis_client.get(redis_tag_id_key)
tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id) # tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list)) # if not redis_tag_id_data:
redis_client.expire(redis_tag_id_key,1*24*60*60) # tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
if len(tag_topic_id_list)>index: # redis_client.expire(redis_tag_id_key,1*24*60*60)
for topic_id in tag_topic_id_list[index:]: #
if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list_random and topic_id not in recommend_topic_id_list: # if len(tag_topic_id_list)>index:
recommend_topic_id_list_random.append(topic_id) # for topic_id in tag_topic_id_list[index:]:
break # if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list_random and topic_id not in recommend_topic_id_list:
random.shuffle(recommend_topic_id_list_random) # recommend_topic_id_list_random.append(topic_id)
recommend_topic_id_list.extend(recommend_topic_id_list_random) # break
# tag_id = recommend_tag_list[0:100]
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# if len(tag_topic_id_list) > index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list_es and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list_es.append(topic_id)
# if len(recommend_topic_id_list_es) == 100:
# break
# # random.shuffle(recommend_topic_id_list_random)
# recommend_topic_id_list.extend(recommend_topic_id_list_es)
topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id) topic_recommend_redis_key = self.linucb_recommend_topic_id_prefix + str(device_id)
redis_data_dict = { redis_data_dict = {
"data": json.dumps(recommend_topic_id_list), "data": json.dumps(tag_topic_id_list),
"cursor":0 "cursor":0
} }
redis_client.hmset(topic_recommend_redis_key,redis_data_dict) redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
...@@ -138,6 +147,9 @@ class CollectData(object): ...@@ -138,6 +147,9 @@ class CollectData(object):
click_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online") click_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id).values_list("tag_id","is_online")
for tag_id,is_online in click_sql_query_results: for tag_id,is_online in click_sql_query_results:
if is_online: if is_online:
# tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id=tag_id).values_list("id","collection","is_ai")
# for id,collection,is_ai in tag_sql_query_results:
# if collection == 1 or is_ai == 1:
tag_list.append(tag_id) tag_list.append(tag_id)
is_click = 1 is_click = 1
...@@ -176,6 +188,10 @@ class CollectData(object): ...@@ -176,6 +188,10 @@ class CollectData(object):
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online") exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id__in=exposure_topic_id_list).values_list("topic_id","tag_id","is_online")
for topic_id,tag_id,is_online in exposure_sql_query_results: for topic_id,tag_id,is_online in exposure_sql_query_results:
if is_online: if is_online:
# tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
# id=tag_id).values_list("id", "collection", "is_ai")
# for id, collection, is_ai in tag_sql_query_results:
# if collection == 1 or is_ai == 1:
tag_list.append(tag_id) tag_list.append(tag_id)
if topic_id not in topic_tag_id_dict: if topic_id not in topic_tag_id_dict:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment