Commit 4f420ce0 authored by lixiaofang's avatar lixiaofang

Merge branch 'master' into vote_topic

parents 230e12fb 7be80ee9
......@@ -15,7 +15,7 @@ from libs.es import ESPerform
from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar, UserSimilarScore
from linucb.utils.register_user_tag import RegisterUserTag
from trans2es.models.tag import RegisterShowTag, Tag
from trans2es.models.tag import SettingsConfig, Tag
@shared_task
......@@ -113,16 +113,22 @@ def sync_user_similar_score():
def get_tag_count():
try:
# 获取搜索推荐热词
results_registr_tag = list(set(RegisterShowTag.objects.filter(is_deleted=False, is_online=1).values_list("tag_id", flat=True)))
# results_registr_tag = list(set(SettingsConfig.objects.filter(is_deleted=False).values_list("val", flat=True)))
# tag_val_list =set()
# for item in results_registr_tag:
# for word in item.split():
# tag_val_list.add(word)
# 获取符合条件的核心词
results_tag = list(set(Tag.objects.filter(is_online=True, is_deleted=False, collection=1).values_list("id", flat=True)))
results_tag = list(
set(Tag.objects.filter(is_online=True, is_deleted=False, collection=1).values_list("id", flat=True)))
redis_registr_tag = "physical:search_hotword:results_registr_tag"
redis_tag = "physical:search_hotword:results_tag"
redis_client.set(redis_registr_tag, list(results_registr_tag))
# redis_client.set(redis_registr_tag, list(results_registr_tag))
redis_client.set(redis_tag, list(results_tag))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -164,10 +164,23 @@ class CollectData(object):
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
topic_tag_list = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id,is_online=True).values_list("tag_id",flat=True))
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_tag_list,is_online=True,is_deleted=False).values_list("id","collection","is_ai")
for id,collection,is_ai in tag_query_results:
if collection and is_ai:
# topic_tag_list = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=topic_id,is_online=True).values_list("tag_id",flat=True))
# tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_tag_list,is_online=True,is_deleted=False).values_list("id","collection","is_ai")
# for id,collection,is_ai in tag_query_results:
# if collection and is_ai:
# click_topic_tag_list.append(id)
topic_tag_list = list()
click_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(
topic_id=topic_id, is_online=True).values_list("tag_id", "is_collection")
for tag_id, is_collection in click_results:
topic_tag_list.append(tag_id)
if is_collection:
click_topic_tag_list.append(tag_id)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
id__in=topic_tag_list, is_online=True, is_deleted=False).values_list("id",
"is_ai")
for id, is_ai in tag_query_results:
if is_ai:
click_topic_tag_list.append(id)
logging.info("positive tag_list,device_id:%s,topic_id:%s,tag_list:%s" % (
......@@ -197,58 +210,58 @@ class CollectData(object):
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
if len(click_topic_tag_list)>0:
self.update_recommend_tag_list(device_id, user_feature, user_id,click_topic_tag_list=click_topic_tag_list)
elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["exposure_cards"],str):
exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
elif isinstance(raw_val_dict["params"]["exposure_cards"],list):
exposure_cards_list = raw_val_dict["params"]["exposure_cards"]
else:
exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"]
user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
logging.warning("type msg:%s" % raw_val_dict.get("type"))
exposure_topic_id_list = list()
for item in exposure_cards_list:
if "card_id" not in item:
continue
exposure_topic_id = item["card_id"]
logging.info(
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
if exposure_topic_id:
exposure_topic_id_list.append(exposure_topic_id)
topic_tag_id_dict = dict()
tag_list = list()
exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).\
filter(topic_id__in=exposure_topic_id_list).\
values_list("topic_id","tag_id","is_online","is_collection")
# if len(exposure_sql_query_results)>0:
for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
if is_online and is_collection == 1:
tag_list.append(tag_id)
if is_online:
tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
id=tag_id).values_list("id", "collection", "is_ai")
for id, collection, is_ai in tag_sql_query_results:
if (is_ai == 1) and id not in tag_list:
tag_list.append(id)
if topic_id not in topic_tag_id_dict:
topic_tag_id_dict[topic_id] = list()
topic_tag_id_dict[topic_id].append(tag_id)
is_click = 0
is_vote = 0
reward = 1 if is_click or is_vote else 0
logging.info("negative tag_list,device_id:%s,topic_tag_id_dict:%s" % (
str(device_id), str(topic_tag_id_dict)))
for tag_id in tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature, user_id)
# elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
# if isinstance(raw_val_dict["params"]["exposure_cards"],str):
# exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
# elif isinstance(raw_val_dict["params"]["exposure_cards"],list):
# exposure_cards_list = raw_val_dict["params"]["exposure_cards"]
# else:
# exposure_cards_list = list()
# device_id = raw_val_dict["device"]["device_id"]
# user_id = raw_val_dict["user_id"] if "user_id" in raw_val_dict else None
# logging.warning("type msg:%s" % raw_val_dict.get("type"))
# exposure_topic_id_list = list()
# for item in exposure_cards_list:
# if "card_id" not in item:
# continue
# exposure_topic_id = item["card_id"]
# logging.info(
# "consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
# if exposure_topic_id:
# exposure_topic_id_list.append(exposure_topic_id)
#
# topic_tag_id_dict = dict()
# tag_list = list()
# exposure_sql_query_results = TopicTag.objects.using(settings.SLAVE_DB_NAME).\
# filter(topic_id__in=exposure_topic_id_list).\
# values_list("topic_id","tag_id","is_online","is_collection")
# # if len(exposure_sql_query_results)>0:
# for topic_id,tag_id,is_online,is_collection in exposure_sql_query_results:
# if is_online and is_collection == 1:
# tag_list.append(tag_id)
# if is_online:
# tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
# id=tag_id).values_list("id", "collection", "is_ai")
# for id, collection, is_ai in tag_sql_query_results:
# if (is_ai == 1) and id not in tag_list:
# tag_list.append(id)
#
# if topic_id not in topic_tag_id_dict:
# topic_tag_id_dict[topic_id] = list()
# topic_tag_id_dict[topic_id].append(tag_id)
#
# is_click = 0
# is_vote = 0
#
# reward = 1 if is_click or is_vote else 0
#
# logging.info("negative tag_list,device_id:%s,topic_tag_id_dict:%s" % (
# str(device_id), str(topic_tag_id_dict)))
# for tag_id in tag_list:
# self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
#
# # 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
# self.update_recommend_tag_list(device_id, user_feature, user_id)
elif "type" in raw_val_dict and "interest_choice_click_next" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["tagid_list"],str):
tagid_list = json.loads(raw_val_dict["params"]["tagid_list"])
......
......@@ -12,7 +12,7 @@ from search.utils.group import GroupUtils
from search.utils.common import GroupSortTypes
from libs.es import ESPerform
from trans2es.models.pictorial import PictorialTopics
from trans2es.models.tag import RegisterShowTag, Tag
from trans2es.models.tag import SettingsConfig, Tag
from libs.cache import redis_client
......@@ -33,25 +33,20 @@ def search_hotword(device_id=-1):
"""
try:
all_tag_name_list = set()
results_registr_tag = json.loads(redis_client.get("physical:search_hotword:results_registr_tag"))
# results_registr_tag = json.loads(redis_client.get("physical:search_hotword:results_registr_tag"))
results_tag = json.loads(redis_client.get("physical:search_hotword:results_tag"))
# 先获取搜索推荐热词
for num in range(0, len(results_registr_tag) - 1):
tag_id = random.randint(0, len(results_registr_tag) - 1)
results_tag_chose = list(
set(RegisterShowTag.objects.filter(tag_id=results_registr_tag[tag_id], is_online=True).values_list(
"tag_id", flat=True)))
if results_tag_chose:
results_tag_recommend = list(
set(Tag.objects.filter(id=results_tag_chose[0], is_online=True).values_list("name",
flat=True)))
if results_tag_recommend:
all_tag_name_list.add(results_tag_recommend[0])
if len(all_tag_name_list) == 6 or num == results_tag:
break
results_registr_tag = list(set(SettingsConfig.objects.filter(is_deleted=False,key=1).values_list("val", flat=True)))
tag_val_list = set()
for item in results_registr_tag:
for word in item.split():
tag_val_list.add(word)
tag_id_list = random.sample(range(0, len(tag_val_list)), 6)
for tag_id in tag_id_list:
tag_val = list(tag_val_list)[tag_id]
all_tag_name_list.add(tag_val)
logging.info("get all_tag_name_list:%s" % all_tag_name_list)
# 获取个性化标签
linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
tag_recommend_redis_key = linucb_recommend_redis_prefix + str(device_id)
......@@ -84,3 +79,4 @@ def search_hotword(device_id=-1):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_tag_name": []}
-- Deploy flipr:sl_user_login_status to mysql
BEGIN;
CREATE TABLE `sl_user_login_status` (
`user_id` varchar(100) NOT NULL COMMENT '用户ID',
`is_shadow` tinyint(1) NOT NULL COMMENT '是否是马甲用户',
`first_visit_day` date COMMENT '首次日期',
`last_visit_day` date COMMENT '最后一次登陆日期',
`day_id` varchar(10) NOT NULL COMMENT '数据账期',
PRIMARY KEY (`user_id`),
INDEX `lv_day` (`last_visit_day`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='数据仓库推送表用户登录状态表';
-- XXX Add DDLs here.
COMMIT;
-- Revert flipr:sl_user_login_status from mysql
BEGIN;
DROP table sl_user_login_status;
-- XXX Add DDLs here.
COMMIT;
[core]
engine = mysql
# plan_file = sqitch.plan
# top_dir = .
# [engine "mysql"]
# target = db:mysql:
# registry = sqitch
# client = /usr/local/mysql/bin/mysql
%syntax-version=1.0.0
%project=flipr
%uri=https://github.com/sqitchers/sqitch-mysql-intro/
sl_user_login_status 2019-06-25T11:06:15Z Lxrent <lxrent@lxrentdeMacBook-Pro.local> # 数据仓库推送表用户登录状态表
-- Verify flipr:sl_user_login_status on mysql
BEGIN;
SELECT user_id FROM sl_user_login_status;
-- XXX Add verifications here.
ROLLBACK;
......@@ -5,8 +5,8 @@
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"is_recommend":{"type":"boolean"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_search"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_search"},
"topic_num":{"type":"long"},
"creator_id":{"type":"long"},
"icon":{"type":"text"},
......@@ -14,7 +14,7 @@
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"tag_id":{"type":"long"},
"tag_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"tag_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_search"},
"topic_id_list":{"type":"long"},
"effective":{"type":"boolean"},
"offline_score":{"type":"long"},
......
......@@ -78,7 +78,7 @@ class Pictorial(models.Model):
for topic_id in topic_id_list:
topic_id_object = Topic.objects.filter(id=int(topic_id)).first()
if topic_id_object and topic_id_object.is_online and int(topic_id_object.content_level) in [0, 3, 4, 5]:
if topic_id_object and topic_id_object.is_online and int(topic_id_object.content_level) in [0, 3, 4, 5,6]:
effective_num += 1
if effective_num >= 5:
ret = True
......
......@@ -96,13 +96,14 @@ class CommunityTagType(models.Model):
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
class RegisterShowTag(models.Model):
class SettingsConfig(models.Model):
class Meta:
verbose_name = "搜索热词"
db_table = "register_show_tag"
db_table = "settingsconfig"
id = models.IntegerField(primary_key=True, verbose_name=u"主键ID")
is_deleted = models.IntegerField(verbose_name=u"是否删除")
is_online = models.IntegerField(verbose_name=u"是否在线")
tag_id = models.IntegerField(verbose_name="标签ID")
key = models.IntegerField(verbose_name=u"值")
val = models.IntegerField(verbose_name="标签内容")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment