Commit 22e7eace authored by 高雅喆's avatar 高雅喆

add gm_tag_cf

parent 84e6abb5
import redis
import json
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
......@@ -49,20 +51,28 @@ if __name__ == '__main__':
# 所有标签的协同标签
all_tags_cf_tags = dict()
for tag in all_tags_name:
if tag in cf_tags:
if tag in sorted_tag_ids:
tag_cf_tags = get_similary_tags(tag, cf_tags, ratings_matrix)
else:
tag_cf_tags = []
continue
# 过滤父级标签
parent_tags = all_tag_parent_tags[tag]
parent_tags = all_tag_parent_tags.get(tag, []) # 1级标签没有父级标签
for parent_tag in parent_tags:
if parent_tag in tag_cf_tags:
tag_cf_tags.remove(parent_tag)
# 过滤标签的同义词
tag_name = all_tags_name[tag]
tag_cf_tags_names = [all_tags_name[i] for i in tag_cf_tags]
tag_synonym_names = all_word_synonym_words[tag_name]
tag_synonym_names = all_word_synonym_words.get(tag_name, []) # 部分标签名称没有同义词
for tag_synonym_name in tag_synonym_names:
if tag_synonym_name in tag_cf_tags_names:
tag_cf_tags_names.remove(tag_synonym_name)
all_tags_cf_tags[tag_name] = tag_cf_tags_names
all_tags_cf_tags[tag_name] = json.dumps(tag_cf_tags_names[:10])
redis_client = redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN9@172.16.40.173:6379')
gm_tag_cf_tags_key = "gm:tag:cf:tags"
redis_client.hmset(gm_tag_cf_tags_key, all_tags_cf_tags)
# doris上执行
# data = redis_client.hgetall(gm_tag_cf_tags_key)
# data_dict = {str(i, 'utf-8'): json.loads(data[i]) for i in data}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment