Commit 3be11306 authored by 赵威's avatar 赵威

get tractate info

parent 65941175
# coding: utf-8
# source /srv/envs/SyntaxNet/bin/activate
import re
import sys
import jieba
import numpy as np
from gensim import models as word2vec
from utils.es import es_scan
reload(sys)
sys.setdefaultencoding("utf8")
def get_new_user_tractate_info():
q = {
"query": {
"bool": {
"must": [{
"term": {
"is_online": "true"
}
}, {
"terms": {
"content_level": [6, 5, 4, 3.5, 3]
}
}, {
"terms": {
"operators_add_tags": [3315, 14288]
}
}]
}
},
"_source": {
"includes": ["id", "portrait_tag_name", "tags_v3", "keynote_sentence"]
}
}
return es_scan("tractate", q)
if __name__ == "__main__":
model_path = "/data/log/word2vec/app/ipynb_garbage_files/test_w2v_model_4"
keyword_txt = "/data/log/word2vec/all_key_word.txt"
model = word2vec.Word2Vec.load(model_path)
all_keywords_set = set([])
with open(keyword_txt, "r") as f:
words = f.readlines()
for word in words:
word = word.rstrip("\n")
all_keywords_set.add(word)
print("keyword: " + str(len(all_keywords_set)))
for word in all_keywords_set:
jieba.add_word(word, freq=1000, tag="user_defined")
es_result = get_new_user_tractate_info()
count = 0
for i in es_result:
count += 1
# print(count)
source = i["_source"]
print(count, source["id"])
# sentences = [
# "是不是肋软骨钙化就只能做异体骨修复鼻子?",
# "",
# "",
# "随着肋骨隆鼻的流行,想做肋骨隆鼻的人越来越多,但是又听说肋骨钙化就不能做这项手术了,因此想要了解清楚这究竟是什么情况?",
# "是不是不能做鼻综合手术了?",
# "",
# "",
# "肋骨包括硬骨和软骨,肋软骨位于肋骨的前端,为透明软骨,具有一定的弹性,参与胸廓的构成。",
# "鼻综合手术时一般选择的是第6、7肋的软骨。",
# "未钙化的肋软骨呈玉白色,有一定的柔软性,易取易雕刻。",
# "随着年龄增长,人的肋软骨会逐渐变成硬骨,这就是“肋软骨钙化”。",
# "",
# "",
# "而未完全钙化的软骨,钙化部分呈现淡黄色,易碎易折;",
# "重度钙化的骨性部分还因为缺乏骨膜等软组织的血液供养,抗感染性变差,加之细节的雕刻和缝合困难,常常导致手术效果不理想。",
# "",
# "",
# "一般肋软骨的钙化从25岁开始,年龄越大,钙化的几率越高。",
# "此外,肋软骨的钙化跟个人饮食、环境的改变可能也有关系。",
# "而且随着现在的环境的变化、人们饮食结构的变化,钙化的情也越来越多了,所以如果想做肋骨隆鼻还是要近早手术。",
# ]
# word_list = []
# for s in sentences:
# tmp_lst = []
# for i in jieba.lcut(s):
# s = i.encode("utf-8")
# if s in all_keywords_set:
# tmp_lst.append(s)
# word_list.append(tmp_lst)
# print("11111111111111")
# print(word_list)
# count = 0
# res = {}
# for lst in word_list:
# vecs = []
# for name in lst:
# try:
# vecs.append(model.wv.get_vector(name))
# except Exception as e:
# # print(e)
# pass
# if vecs:
# n = np.average(vecs, axis=0)
# res[count] = n
# count += 1
# print("222222222222")
# print(res)
# res2 = {}
# for name in ["软骨垫鼻基底", "肋软骨隆鼻", "软骨隆鼻", "鼻综合", "肋软骨", "隆鼻", "肋骨隆鼻"]:
# try:
# vec = model.wv.get_vector(name)
# res2[name] = vec
# except Exception as e:
# print(e)
# print("33333333333")
# print(res2)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment