get tags from db

c7ed5c29 · 赵威 · 5ab7d3ea · c7ed5c29 · c7ed5c29
Commit c7ed5c29 authored Jan 14, 2021 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 65 additions and 72 deletions

tractate_sentence_similary.py tractate_sentence_similary.py +40 -72

db.py utils/db.py +25 -0

No files found.
--- a/tractate_sentence_similary.py
+++ b/tractate_sentence_similary.py
@@ -2,6 +2,7 @@

 # source /srv/envs/SyntaxNet/bin/activate

+import json
 import re
 import sys

@@ -10,6 +11,7 @@ import numpy as np
 from gensim import models as word2vec

 from utils.cache import redis_client5
+from utils.db import get_all_business_tags
 from utils.es import es_scan

 reload(sys)
@@ -62,11 +64,42 @@ def get_tractate_vector(sentence_lst, all_keywords_set, model):
                pass
        if vecs:
            n = np.average(vecs, axis=0)
-            res[count] = n
+            res[count] = n.tolist()
        count += 1
    return res


+def save_tractate_vector_to_redis(all_keywords_set, model):
+    es_result = get_new_user_tractate_info()
+    count = 0
+    for i in es_result:
+        count += 1
+        source = i["_source"]
+        sentences = source["keynote_sentence"]
+        id = source["id"]
+        vec = get_tractate_vector(sentences, all_keywords_set, model)
+        redis_key = "rims:tractate:sentense:vector:" + str(id)
+        if vec:
+            print(count, len(vec))
+            redis_client5.set(redis_key, json.dumps(vec))
+            # TODO remove
+            redis_client5.expire(redis_key, 60 * 60 * 24 * 3)
+
+
+def save_tag_vector_to_redis(all_tags_lst, model):
+    res = {}
+    for name in all_tags_lst:
+        try:
+            vec = model.wv.get_vector(name).tolist()
+            res[name] = vec
+        except Exception as e:
+            pass
+    redis_key = "rims:tractate:tags:vector"
+    if res:
+        redis_client5.hmset(redis_key, json.dumps(res))
+        redis_client5.expire(redis_key, 60 * 60 * 24 * 3)
+
+
 if __name__ == "__main__":
    model_path = "/data/log/word2vec/app/ipynb_garbage_files/test_w2v_model_4"
    keyword_txt = "/data/log/word2vec/all_key_word.txt"
@@ -84,74 +117,9 @@ if __name__ == "__main__":
    for word in all_keywords_set:
        jieba.add_word(word, freq=1000, tag="user_defined")

-    tractate_vector_dict = {}
-    es_result = get_new_user_tractate_info()
-    # count = 0
-    for i in es_result:
-        # count += 1
-        source = i["_source"]
-        sentences = source["keynote_sentence"]
-        id = source["id"]
-        res = get_tractate_vector(sentences, all_keywords_set, model)
-        print(res)
-
-    # sentences = [
-    #     "是不是肋软骨钙化就只能做异体骨修复鼻子？",
-    #     "",
-    #     "",
-    #     "随着肋骨隆鼻的流行，想做肋骨隆鼻的人越来越多，但是又听说肋骨钙化就不能做这项手术了，因此想要了解清楚这究竟是什么情况？",
-    #     "是不是不能做鼻综合手术了？",
-    #     "",
-    #     "",
-    #     "肋骨包括硬骨和软骨，肋软骨位于肋骨的前端，为透明软骨，具有一定的弹性，参与胸廓的构成。",
-    #     "鼻综合手术时一般选择的是第6、7肋的软骨。",
-    #     "未钙化的肋软骨呈玉白色，有一定的柔软性，易取易雕刻。",
-    #     "随着年龄增长，人的肋软骨会逐渐变成硬骨，这就是“肋软骨钙化”。",
-    #     "",
-    #     "",
-    #     "而未完全钙化的软骨，钙化部分呈现淡黄色，易碎易折；",
-    #     "重度钙化的骨性部分还因为缺乏骨膜等软组织的血液供养，抗感染性变差，加之细节的雕刻和缝合困难，常常导致手术效果不理想。",
-    #     "",
-    #     "",
-    #     "一般肋软骨的钙化从25岁开始，年龄越大，钙化的几率越高。",
-    #     "此外，肋软骨的钙化跟个人饮食、环境的改变可能也有关系。",
-    #     "而且随着现在的环境的变化、人们饮食结构的变化，钙化的情也越来越多了，所以如果想做肋骨隆鼻还是要近早手术。",
-    # ]
-
-    # word_list = []
-    # for s in sentences:
-    #     tmp_lst = []
-    #     for i in jieba.lcut(s):
-    #         s = i.encode("utf-8")
-    #         if s in all_keywords_set:
-    #             tmp_lst.append(s)
-    #     word_list.append(tmp_lst)
-    # print("11111111111111")
-    # print(word_list)
-
-    # count = 0
-    # res = {}
-    # for lst in word_list:
-    #     vecs = []
-    #     for name in lst:
-    #         try:
-    #             vecs.append(model.wv.get_vector(name))
-    #         except Exception as e:
-    #             # print(e)
-    #             pass
-    #     if vecs:
-    #         n = np.average(vecs, axis=0)
-    #         res[count] = n
-    #     count += 1
-    # print("222222222222")
-    # print(res)
-
-    # res2 = {}
-    # for name in ["软骨垫鼻基底", "肋软骨隆鼻", "软骨隆鼻", "鼻综合", "肋软骨", "隆鼻", "肋骨隆鼻"]:
-    #     try:
-    #         vec = model.wv.get_vector(name)
-    #         res2[name] = vec
-    #     except Exception as e:
-    #         print(e)
-    # print("33333333333")
-    # print(res2)
+    # save_tractate_vector_to_redis(all_keywords_set, model)
+
+    all_tags_lst = get_all_business_tags()
+    print("all tags: " + str(len(all_tags_lst)))
+    print(all_tags_lst[:5])
+    # save_tag_vector_to_redis(all_tags_lst, model)
--- a/utils/db.py
+++ b/utils/db.py
@@ -22,6 +22,25 @@ def get_data_from_jerry_test(sql):
        return traceback.format_exc()


+def get_data_from_zhengxing(sql):
+    try:
+        db = pymysql.connect(host="172.16.30.141",
+                             port=3306,
+                             user="zx_str",
+                             passwd="ZXueX58pStrage",
+                             db="zhengxing",
+                             charset="utf8",
+                             cursorclass=pymysql.cursors.DictCursor)
+        cursor = db.cursor()
+        cursor.execute(sql)
+        results = cursor.fetchall()
+        db.close()
+        return results
+    except Exception as e:
+        print(traceback.format_exc())
+        return traceback.format_exc()
+
+
 def get_device_click_tractate_ids():
    sql = "select device, tractate_ids from device_click_tractate"
    data = get_data_from_jerry_test(sql)
@@ -30,3 +49,9 @@ def get_device_click_tractate_ids():
        for i in data:
            res[i["device"]] = i["tractate_ids"].rstrip("\n").split(",")
    return res
+
+
+def get_all_business_tags():
+    sql = "select name from api_tag where tag_type+0<'4'+0 and is_online = 1"
+    data = get_data_from_zhengxing(sql)
+    print(data)