Commit 6bacb3a6 authored by 赵威's avatar 赵威

get tags info

parent df5ddaaf
......@@ -44,13 +44,14 @@ def get_new_user_tractate_info():
return es_scan("tractate", q)
def get_tractate_vector(sentence_lst, all_keywords_set, model):
def get_tractate_vector(sentence_lst, all_keywords_set, all_tags_lst, model):
all_tags_set = set(all_tags_lst)
word_list = []
for s in sentence_lst:
tmp_lst = []
for i in jieba.lcut(s):
s = i.encode("utf-8")
if s in all_keywords_set:
if s in all_tags_set:
tmp_lst.append(s)
word_list.append(tmp_lst)
word_list = word_list[:15]
......@@ -70,7 +71,7 @@ def get_tractate_vector(sentence_lst, all_keywords_set, model):
return res
def save_tractate_vector_to_redis(all_keywords_set, model):
def save_tractate_vector_to_redis(all_keywords_set, all_tags_lst, model):
es_result = get_new_user_tractate_info()
count = 0
for i in es_result:
......@@ -78,11 +79,11 @@ def save_tractate_vector_to_redis(all_keywords_set, model):
source = i["_source"]
sentences = source["keynote_sentence"]
id = source["id"]
vec = get_tractate_vector(sentences, all_keywords_set, model)
vec = get_tractate_vector(sentences, all_keywords_set, all_tags_lst, model)
redis_key = "rims:tractate:sentense:vector:" + str(id)
if vec:
print(count, id, len(vec))
redis_client5.set(redis_key, json.dumps(vec))
# redis_client5.set(redis_key, json.dumps(vec))
# redis_client5.expire(redis_key, 60 * 60 * 24 * 3)
......@@ -120,9 +121,9 @@ if __name__ == "__main__":
for word in all_keywords_set:
jieba.add_word(word, freq=1000, tag="user_defined")
save_tractate_vector_to_redis(all_keywords_set, model)
all_tags_lst = [i.encode("utf-8") for i in get_all_business_tags()]
print("all tags: " + str(len(all_tags_lst)))
print(all_tags_lst[:5])
save_tag_vector_to_redis(all_tags_lst, model)
save_tractate_vector_to_redis(all_keywords_set, all_tags_lst, model)
# save_tag_vector_to_redis(all_tags_lst, model)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment