Commit 44e1b0df authored by 高雅喆's avatar 高雅喆

画像V3,冷启动和画像存储tag的name

parent 1187ed7d
......@@ -40,7 +40,7 @@ def get_hot_search_words_tag():
return []
def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10):
def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=10):
try:
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
db='jerry_test', charset='utf8')
......@@ -136,6 +136,13 @@ def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2
redis_client.hmset(cl_id_portrait_key2, gmkv_tag_score2_sum_dict)
redis_client.expire(cl_id_portrait_key2, time=30 * 24 * 60 * 60)
# 标签name写redis
cl_id_portrait_key3 = "user:service_portrait_tags3:cl_id:" + str(cl_id)
gmkv_tag_score3_sum_dict = {all_tags_name[i]: gmkv_tag_score2_sum_dict[i] for i in gmkv_tag_score2_sum_dict}
redis_client.delete(cl_id_portrait_key3)
redis_client.hmset(cl_id_portrait_key3, gmkv_tag_score3_sum_dict)
redis_client.expire(cl_id_portrait_key3, time=30 * 24 * 60 * 60)
# 写tidb,gmkv同步
stat_date = datetime.datetime.today().strftime('%Y-%m-%d')
replace_sql = """replace into user_service_portrait_tags (stat_date, cl_id, tag_list) values("{stat_date}","{cl_id}","{tag_list}")"""\
......@@ -182,6 +189,9 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag = get_all_3tag_2tag()
# 标签id对应的中文名称
all_tags_name = get_all_tags_name()
# 画像冷启动
hot_search_words = get_hot_search_words_tag()
hot_search_words_portrait = list()
......@@ -203,8 +213,15 @@ if __name__ == '__main__':
hot_search_words_portrait_portrait_key2 = "user:service_coldstart_tags2"
hot_search_words_portrait_dict = {i["id"]: 0.2 for i in hot_search_words}
gm_kv_cli.hmset(hot_search_words_portrait_portrait_key2, hot_search_words_portrait_dict)
redis_client.delete(hot_search_words_portrait_portrait_key2)
redis_client.hmset(hot_search_words_portrait_portrait_key2, hot_search_words_portrait_dict)
hot_search_words_portrait_portrait_key3 = "user:service_coldstart_tags3"
hot_search_words_portrait3_dict = {i["keywords"]: 0.2 for i in hot_search_words}
redis_client.delete(hot_search_words_portrait_portrait_key3)
redis_client.hmset(hot_search_words_portrait_portrait_key3, hot_search_words_portrait3_dict)
# 搜索词tag
search_words_synonym_tags_key = "search:words:synonym:tags"
search_words_synonym_tags_json = json.dumps(all_word_tags)
......@@ -224,7 +241,7 @@ if __name__ == '__main__':
spark.sparkContext.setLogLevel("WARN")
spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
device_ids_lst_rdd = spark.sparkContext.parallelize(device_ids_lst)
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag))
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name))
result.collect()
except Exception as e:
......
......@@ -174,6 +174,18 @@ def get_all_3tag_2tag():
print(e)
def get_all_tags_name():
try:
sql = "select id, name from api_tag where tag_type+0<'4'+0 and is_online=1"
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', sql)
result_dict = dict()
for data in mysql_results:
result_dict[data['id']] = data['name']
return result_dict
except Exception as e:
print(e)
def get_tag2_from_tag3(tag3, all_3tag_2tag, user_log_df_tag2_list):
try:
tag2s = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment