Commit 236b7237 authored by 段英荣's avatar 段英荣

Merge branch 'similar_sort' into 'master'

modify

See merge request !152
parents 91891810 d964c416
......@@ -59,9 +59,17 @@ class LinUCB:
AaI_tmp = np.array(Aa_list)
theta_tmp = np.array(theta_list)
art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_list_len = len(tag_list)/3
top_np_ind = np.argpartition(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)), -top_tag_list_len)[-top_tag_list_len:]
return [int(art_max)]
top_tag_list = list()
top_np_list = top_np_ind.tolist()
for tag_id in top_np_list:
top_tag_list.append(tag_id)
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
return top_tag_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
......
......@@ -153,10 +153,6 @@ class TopicUtils(object):
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
# attention_user_id_term_list = cls.___get_should_term_list(attention_user_id_list,field_name="user_id")
# pick_user_id_term_list = cls.___get_should_term_list(pick_user_id_list,field_name="user_id")
# same_group_user_id_term_list = cls.___get_should_term_list(same_group_id_list,field_name="user_id")
q = dict()
q["query"] = dict()
......@@ -244,7 +240,24 @@ class TopicUtils(object):
{"term": {"has_image":True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
]
],
"should": [
{
"bool":{
"must":[
{"term":{"has_image":True}},
{"term": {"has_video": False}}
]
}
},{
"bool":{
"must":{
"term":{"has_video":True}
}
}
}
],
"minimum_should_match":1
}
},
"score_mode": "sum",
......
......@@ -130,92 +130,6 @@ def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
"""
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_field_list = [b'last_offset_num', b'unread_topic_id']
for page_id in range(0,offset,size):
redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
recommend_topic_ids = []
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type)
have_read_group_id_set = set()
unread_topic_id_dict = dict()
have_read_topic_id_set = set()
if redis_field_val_list[1] and offset>0:
if (user_id>0 and offset==last_offset_num) or user_id==-1:
ori_unread_topic_id_dict = json.loads(redis_field_val_list[1])
if len(ori_unread_topic_id_dict) > 0:
topic_id_dict.update(ori_unread_topic_id_dict)
for have_read_item in redis_field_val_list[2:]:
if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量
cur_page_group_num = 0
# 当前页用户数量
cur_page_user_num = 0
for topic_id in topic_id_dict:
if topic_id_dict[topic_id] in have_read_group_id_set:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
else:
if topic_id not in have_read_topic_id_set:
if isinstance(topic_id_dict[topic_id],int) and topic_id_dict[topic_id]>0 and cur_page_group_num<(size*0.9):
have_read_group_id_set.add(topic_id_dict[topic_id])
have_read_topic_id_set.add(topic_id)
cur_page_group_num += 1
recommend_topic_ids.append(topic_id)
elif topic_id_dict[topic_id] and cur_page_user_num<(size*0.1):
cur_page_user_num += 1
recommend_topic_ids.append(topic_id)
else:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
if len(recommend_topic_ids) >= size:
break
offi_unread_topic_id_dict = dict()
if len(recommend_topic_ids) < size and len(unread_topic_id_dict)>0:
for unread_topic_id in unread_topic_id_dict:
if len(recommend_topic_ids)<size:
recommend_topic_ids.append(unread_topic_id)
else:
offi_unread_topic_id_dict[unread_topic_id] = unread_topic_id_dict[unread_topic_id]
redis_dict = {
"unread_topic_id":json.dumps(offi_unread_topic_id_dict),
"last_offset_num":offset+size,
offset: json.dumps(recommend_topic_ids)
}
redis_client.hmset(redis_key,redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key,15*60)
return recommend_topic_ids
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
"""
@bind("physical/search/query_tag_id_by_topic")
def query_tag_id_by_topic(offset=0,size=10,topic_id_list=[],user_id=-1):
try:
......
......@@ -14,13 +14,13 @@
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
......
......@@ -54,6 +54,7 @@ class Topic(models.Model):
Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID')
has_video = models.IntegerField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
description = models.CharField(verbose_name=u'日记本描述', max_length=200)
content = models.CharField(verbose_name=u'日记本内容', max_length=1000)
......
......@@ -185,43 +185,48 @@ class TypeInfo(object):
)
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
start_clock = time.clock()
start_time = time.time()
instance_list = list(table_chunk)
stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list)
stage_2_time = time.time()
es_result = ESPerform.es_helpers_bulk(
es_cli=es,
data_list=data_list,
sub_index_name=sub_index_name,
auto_create_index=True
)
stage_3_time = time.time()
end_clock = time.clock()
return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}').format(
datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
index_prefix=sub_index_name,
type_name=self.name,
pk_start=repr(table_chunk.get_pk_start()),
pk_stop=repr(table_chunk.get_pk_stop()),
count=len(instance_list),
stage_1_duration=stage_1_time - start_time,
stage_2_duration=stage_2_time - stage_1_time,
stage_3_duration=stage_3_time - stage_2_time,
clock_duration=end_clock - start_clock,
response=es_result,
)
try:
start_clock = time.clock()
start_time = time.time()
instance_list = list(table_chunk)
stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list)
stage_2_time = time.time()
es_result = ESPerform.es_helpers_bulk(
es_cli=es,
data_list=data_list,
sub_index_name=sub_index_name,
auto_create_index=True
)
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
stage_3_time = time.time()
end_clock = time.clock()
return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}').format(
datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
index_prefix=sub_index_name,
type_name=self.name,
pk_start=repr(table_chunk.get_pk_start()),
pk_stop=repr(table_chunk.get_pk_stop()),
count=len(instance_list),
stage_1_duration=stage_1_time - start_time,
stage_2_duration=stage_2_time - stage_1_time,
stage_3_duration=stage_3_time - stage_2_time,
clock_duration=end_clock - start_clock,
response=es_result,
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
_get_type_info_map_result = None
......
......@@ -57,6 +57,7 @@ class TopicTransfer(object):
res["manual_score"] = instance.drop_score
res["has_image"] = instance.topic_has_image()
res["has_video"] = instance.has_video
res["language_type"] = instance.language_type
# # 片假名
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment