Commit dfd16540 authored by 高雅喆's avatar 高雅喆

用户画像冷启动

parent a046825a
......@@ -20,6 +20,26 @@ from pyspark.sql.functions import concat_ws
from tool import *
def get_hot_search_words_tag():
try:
hot_search = """
SELECT a.keywords,
b.id,
b.tag_type
FROM api_hot_search_words a
LEFT JOIN api_tag b ON a.keywords=b.name
WHERE a.is_delete=0
AND b.tag_type+0<'4'+0
AND b.is_online=1
ORDER BY a.sorted DESC LIMIT 10
"""
mysql_results = get_data_by_mysql('172.16.30.141', 3306, 'work', 'BJQaT9VzDcuPBqkd', 'zhengxing', hot_search)
return mysql_results
except Exception as e:
print(e)
return []
def get_user_service_portrait(cl_id, all_word_tags, all_tag_tag_type, all_3tag_2tag, size=10):
try:
db_jerry_test = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC',
......@@ -140,6 +160,21 @@ if __name__ == '__main__':
# 3级tag对应的2级tag
all_3tag_2tag = get_all_3tag_2tag()
# 画像冷启动
hot_search_words = get_hot_search_words_tag()
hot_search_words_portrait = list()
for tag_info in hot_search_words:
tmp = dict()
tmp["tag_score"] = 10
tmp["weight"] = 10
tmp["tag2"] = tag_info["id"]
hot_search_words_portrait.append(tmp)
gm_kv_cli = redis.Redis(host="172.16.40.135", port=5379, db=2, socket_timeout=2000)
hot_search_words_portrait_portrait_key = "user:service_coldstart_tags:cl_id:"
hot_search_words_portrait_json = json.dumps(hot_search_words_portrait)
gm_kv_cli.set(hot_search_words_portrait_portrait_key, hot_search_words_portrait_json)
gm_kv_cli.expire(hot_search_words_portrait_portrait_key, time=30 * 24 * 60 * 60)
# rdd
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment