Commit 4eecfd3d authored by 段英荣's avatar 段英荣

Merge branch 'test' into 'master'

Test

See merge request !13
parents 1ff78467 d78e1445
FROM harbor.test.gengmei/base/gm-alpine:v1.3 FROM registry-vpc.cn-beijing.aliyuncs.com/gm-base/gm-alpine:v1.3
COPY ./requirements.txt /tmp COPY ./requirements.txt /tmp
......
@Library('gm-pipeline-library') _
pipeline {
agent any
options {
// Console output add timestamps
timestamps()
// Disallow concurrent executions of the Pipeline
disableConcurrentBuilds()
// On failure, retry the entire Pipeline the specified number of times.
retry(1)
}
parameters {
choice(name: 'CACHE', choices: ['', '--no-cache'], description: 'docker build 是否使用cache,默认使用,不使用为--no-cache')
}
environment {
// Image Tag branch.time.hash
TAG = dockerTag()
// Image Full Tag
IMAGE = "${DOCKER_REGISTRY}/gm-backend/search_tips:$TAG"
}
stages {
stage("Begin") {
steps {
dingNotify "before"
}
}
stage('Build Image') {
steps {
sh "docker build . ${params.CACHE} -t $IMAGE -f /srv/apps/search_tips/Dockerfile"
sh "docker push $IMAGE"
}
}
}
post {
always {
dingNotify "after", "${currentBuild.currentResult}"
}
}
}
...@@ -17,9 +17,8 @@ class ESPerform(object): ...@@ -17,9 +17,8 @@ class ESPerform(object):
cli_info_list = settings.ES_INFO_LIST cli_info_list = settings.ES_INFO_LIST
index_prefix = settings.ES_INDEX_PREFIX index_prefix = settings.ES_INDEX_PREFIX
@classmethod @classmethod
def get_cli(cls,es_ip_list=None): def get_cli(cls, es_ip_list=None):
try: try:
es_info_list = es_ip_list if es_ip_list else cls.cli_info_list es_info_list = es_ip_list if es_ip_list else cls.cli_info_list
# if es_ip_list:#tricky的做法,兼容测试es # if es_ip_list:#tricky的做法,兼容测试es
...@@ -32,7 +31,7 @@ class ESPerform(object): ...@@ -32,7 +31,7 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def get_official_index_name(cls,sub_index_name,index_flag=None): def get_official_index_name(cls, sub_index_name, index_flag=None):
""" """
:remark:get official es index name :remark:get official es index name
:param sub_index_name: :param sub_index_name:
...@@ -40,7 +39,7 @@ class ESPerform(object): ...@@ -40,7 +39,7 @@ class ESPerform(object):
:return: :return:
""" """
try: try:
assert (index_flag in [None,"read","write"]) assert (index_flag in [None, "read", "write"])
official_index_name = cls.index_prefix + "-" + sub_index_name official_index_name = cls.index_prefix + "-" + sub_index_name
if index_flag: if index_flag:
...@@ -52,11 +51,11 @@ class ESPerform(object): ...@@ -52,11 +51,11 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def __load_mapping(cls,doc_type): def __load_mapping(cls, doc_type):
try: try:
mapping_file_path = os.path.join( mapping_file_path = os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
'..', 'trans2es','mapping', '%s.json' % (doc_type,)) '..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
mapping = '' mapping = ''
with open(mapping_file_path, 'r') as f: with open(mapping_file_path, 'r') as f:
for line in f: for line in f:
...@@ -69,7 +68,7 @@ class ESPerform(object): ...@@ -69,7 +68,7 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def create_index(cls,es_cli,sub_index_name): def create_index(cls, es_cli, sub_index_name):
""" """
:remark: create es index,alias index :remark: create es index,alias index
:param sub_index_name: :param sub_index_name:
...@@ -82,11 +81,11 @@ class ESPerform(object): ...@@ -82,11 +81,11 @@ class ESPerform(object):
index_exist = es_cli.indices.exists(official_index_name) index_exist = es_cli.indices.exists(official_index_name)
if not index_exist: if not index_exist:
es_cli.indices.create(official_index_name) es_cli.indices.create(official_index_name)
read_alias_name = cls.get_official_index_name(sub_index_name,"read") read_alias_name = cls.get_official_index_name(sub_index_name, "read")
es_cli.indices.put_alias(official_index_name,read_alias_name) es_cli.indices.put_alias(official_index_name, read_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name,"write") write_alias_name = cls.get_official_index_name(sub_index_name, "write")
es_cli.indices.put_alias(official_index_name,write_alias_name) es_cli.indices.put_alias(official_index_name, write_alias_name)
return True return True
except: except:
...@@ -94,7 +93,7 @@ class ESPerform(object): ...@@ -94,7 +93,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"): def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc"):
""" """
:remark: put index mapping :remark: put index mapping
:param es_cli: :param es_cli:
...@@ -105,13 +104,12 @@ class ESPerform(object): ...@@ -105,13 +104,12 @@ class ESPerform(object):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
write_alias_name = cls.get_official_index_name(sub_index_name,"write") write_alias_name = cls.get_official_index_name(sub_index_name, "write")
index_exist = es_cli.indices.exists(write_alias_name) index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist: if not index_exist:
return False return False
mapping_dict = cls.__load_mapping(sub_index_name) mapping_dict = cls.__load_mapping(sub_index_name)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type) es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type)
return True return True
except: except:
...@@ -119,7 +117,7 @@ class ESPerform(object): ...@@ -119,7 +117,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def put_indices_template(cls,es_cli,template_file_name, template_name): def put_indices_template(cls, es_cli, template_file_name, template_name):
""" """
:remark put index template :remark put index template
:param es_cli: :param es_cli:
...@@ -131,7 +129,7 @@ class ESPerform(object): ...@@ -131,7 +129,7 @@ class ESPerform(object):
assert (es_cli is not None) assert (es_cli is not None)
mapping_dict = cls.__load_mapping(template_file_name) mapping_dict = cls.__load_mapping(template_file_name)
es_cli.indices.put_template(name=template_name,body=mapping_dict) es_cli.indices.put_template(name=template_name, body=mapping_dict)
return True return True
except: except:
...@@ -139,7 +137,7 @@ class ESPerform(object): ...@@ -139,7 +137,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def es_helpers_bulk(cls,es_cli,data_list,sub_index_name,auto_create_index=False,doc_type="_doc"): def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
...@@ -150,8 +148,8 @@ class ESPerform(object): ...@@ -150,8 +148,8 @@ class ESPerform(object):
logging.error("index:%s is not existing,bulk data error!" % official_index_name) logging.error("index:%s is not existing,bulk data error!" % official_index_name)
return False return False
else: else:
cls.create_index(es_cli,sub_index_name) cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name) cls.put_index_mapping(es_cli, sub_index_name)
bulk_actions = [] bulk_actions = []
for data in data_list: for data in data_list:
...@@ -162,7 +160,7 @@ class ESPerform(object): ...@@ -162,7 +160,7 @@ class ESPerform(object):
'_id': data['id'], '_id': data['id'],
'_source': data, '_source': data,
}) })
elasticsearch.helpers.bulk(es_cli,bulk_actions) elasticsearch.helpers.bulk(es_cli, bulk_actions)
return True return True
except: except:
...@@ -170,41 +168,43 @@ class ESPerform(object): ...@@ -170,41 +168,43 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def get_search_results(cls, es_cli,sub_index_name,query_body,offset=0,size=10, def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10,
auto_create_index=False,doc_type="_doc",aggregations_query=False,is_suggest_request=False,batch_search=False): auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False,
batch_search=False):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name,"read") official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
if not index_exists: if not index_exists:
if not auto_create_index: if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name) logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None return None
else: else:
cls.create_index(es_cli,sub_index_name) cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name) cls.put_index_mapping(es_cli, sub_index_name)
logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8")) logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8"))
if not batch_search: if not batch_search:
res = es_cli.search(index=official_index_name,doc_type=doc_type,body=query_body,from_=offset,size=size) res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset,
size=size)
if is_suggest_request: if is_suggest_request:
return res return res
else: else:
result_dict = { result_dict = {
"total_count":res["hits"]["total"], "total_count": res["hits"]["total"],
"hits":res["hits"]["hits"] "hits": res["hits"]["hits"]
} }
if aggregations_query: if aggregations_query:
result_dict["aggregations"] = res["aggregations"] result_dict["aggregations"] = res["aggregations"]
return result_dict return result_dict
else: else:
res = es_cli.msearch(body=query_body,index=official_index_name, doc_type=doc_type) res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type)
logging.info("duan add,msearch res:%s" % str(res)) logging.info("duan add,msearch res:%s" % str(res))
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]} return {"total_count": 0, "hits": []}
\ No newline at end of file
...@@ -2,75 +2,111 @@ ...@@ -2,75 +2,111 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging import logging
import traceback import traceback
import json import json
import base64
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance from libs.tools import point_distance
from gm_rpcd.all import bind
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def get_suggest_tips(query, lat, lng, offset=0, size=50):
def get_suggest_tips(query,lat,lng,offset=0,size=50):
try: try:
# ios输入法在某些情况下会携带\\u2006 # ios输入法在某些情况下会携带\\u2006
query = query.replace("\u2006", '') query = query.replace("\u2006", '')
q = { q = {
"suggest":{ "suggest": {
"tips-suggest": { "tips-suggest": {
"prefix": query, "prefix": query,
"completion": { "completion": {
"field": "suggest", "field": "suggest",
"size": size, "size": size,
"contexts":{ "contexts": {
"is_online":[True] "is_online": [True]
}, },
"fuzzy":{ "fuzzy": {
"fuzziness": 0 "fuzziness": 0
} }
} }
} }
}, },
"_source": { "_source": {
"includes": ["id", "ori_name", "offline_score", "is_online","type_flag","results_num"] "includes": ["id", "ori_name", "offline_score", "is_online", "type_flag", "results_num"]
} }
} }
logging.info("get qqqqqqqq:%s" % q)
have_read_tips_set = set() have_read_tips_set = set()
ret_list = list() ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="suggest",query_body=q,offset=offset,size=size,is_suggest_request=True) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
offset=offset, size=size, is_suggest_request=True)
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]: for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]: for hit_item in tips_item["options"]:
if hit_item["_source"]["ori_name"] not in have_read_tips_set: if hit_item["_source"]["ori_name"] not in have_read_tips_set:
have_read_tips_set.add(hit_item["_source"]["ori_name"]) have_read_tips_set.add(hit_item["_source"]["ori_name"])
highlight_marks = u'<ems>%s</ems>' % query highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,highlight_marks) hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,
highlight_marks)
if hit_item["_source"]["type_flag"] == "hospital": if hit_item["_source"]["type_flag"] == "hospital":
if lat is not None and lng is not None and lat != 0.0 and lng != 0.0:
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict: if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng,lat,g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1]) distance = point_distance(lng, lat,
if distance < 1000*50: g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],
g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000 * 50:
if distance < 1000: if distance < 1000:
if distance < 100: if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米" hit_item["_source"]["describe"] = "<100" + "米"
else: else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米" hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
else: else:
hit_item["_source"]["describe"] = "约" + str(round(1.0*distance/1000,1)) + "km" hit_item["_source"]["describe"] = "约" + str(
round(1.0 * distance / 1000, 1)) + "km"
else: else:
hit_item["_source"]["describe"] = ">50km" hit_item["_source"]["describe"] = ">50km"
else: else:
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = ""
else: else:
if hit_item["_source"]["type_flag"] == "doctor": if hit_item["_source"]["type_flag"] == "doctor":
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
else: else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else "" hit_item["_source"]["describe"] = "约" + str(hit_item["_source"]["results_num"]) + "个结果" if \
hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"]) ret_list.append(hit_item["_source"])
if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list return ret_list
else:
QUERY_KEY = "query:{}:search_tips"
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
for i in labels:
ori = i.split(":")[0]
ori_name = base64.b64decode(ori.encode('utf8')).decode('utf8')
if ori_name not in have_read_tips_set:
result_num = i.split(":")[1]
ret_list.append(
{"results_num": result_num, "ori_name": ori_name, "id": None, "is_online": True,
"offline_score": 0,
"type_flag": get_tips_word_type(ori_name), "highlight_name": ori_name, "describe": ""})
if len(ret_list) >= 50:
return ret_list[0:50]
else:
return ret_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -12,9 +12,8 @@ from libs.tools import json_http_response ...@@ -12,9 +12,8 @@ from libs.tools import json_http_response
from search.utils.auto_tips import get_suggest_tips from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query") @bind("search_tips/search/auto_complete_query")
def auto_complete_query(q,lat,lng): def auto_complete_query(q, lat, lng):
try: try:
"""auto complate words/tags/doctors etc. """auto complate words/tags/doctors etc.
...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng): ...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng):
# if not q: # if not q:
# return json_http_response({'error': 0, 'data': []}) # return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q,float(lat),float(lng)) data = get_suggest_tips(q, float(lat), float(lng))
result = { result = {
'error': 0, 'error': 0,
...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng): ...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng):
return result return result
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error":1,"data":[]} return {"error": 1, "data": []}
No preview for this file type
No preview for this file type
# -*- coding:utf-8 -*-
ALLOWED_HOSTS = ["192.168.78.2"]
SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t'
SENTRY_CELERY_ENDPOINT="http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
#REDIS_URL = "redis://127.0.0.1:6379"
REDIS_URL = "redis://127.0.0.1:6379/1"
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'zhengxing_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
}
},
'bran': {
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'bran_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
GM_ORI_ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX="gm-dbmw"
...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE ...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid import uuid
def uuid4(): def uuid4():
""" """
...@@ -19,10 +17,11 @@ def uuid4(): ...@@ -19,10 +17,11 @@ def uuid4():
""" """
return uuid.uuid4().hex return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name): def get_tips_suggest_list(instance_cn_name):
try: try:
#ch_full_weight = 6.0 * 1000 # ch_full_weight = 6.0 * 1000
#py_full_weight = 3.0 * 1000 # py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000 full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000 py_acronym_full_weight = 3.0 * 1000
...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name):
# 中文 # 中文
for i in range(len(ch_full_word)): for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip() ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i]!="(" and ch_full_word[i]!=")": if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1 suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [ch_name_term], "input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if ch_name_term[0] not in suggest_dict: if ch_name_term[0] not in suggest_dict:
...@@ -64,13 +64,14 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -64,13 +64,14 @@ def get_tips_suggest_list(instance_cn_name):
if py_full_word != ch_full_word: if py_full_word != ch_full_word:
for i in range(len(py_full_word)): for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip() py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i]!="(" and py_full_word[i]!=")": if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3 suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_name_term], "input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
...@@ -88,14 +89,15 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -88,14 +89,15 @@ def get_tips_suggest_list(instance_cn_name):
if py_acronym_full_word != py_full_word: if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)): for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip() py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i]!="(" and py_acronym_full_word[i]!=")": if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len( prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5 suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_acronym_term], "input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if py_acronym_term[0] not in suggest_dict: if py_acronym_term[0] not in suggest_dict:
...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"] suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"] suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values() return suggest_dict.values()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -5,7 +5,8 @@ import logging ...@@ -5,7 +5,8 @@ import logging
import traceback import traceback
import json import json
from libs.cache import redis_client from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion from trans2es.models.query_word_conversion import TagConversion, DoctorConversion, HospitalConversion
class TagTab(object): class TagTab(object):
Weight = { Weight = {
...@@ -84,7 +85,6 @@ class HosTab(TagTab): ...@@ -84,7 +85,6 @@ class HosTab(TagTab):
class QueryWordAttr(object): class QueryWordAttr(object):
# 获取TagConversion表最新的日期 # 获取TagConversion表最新的日期
tag_latest_date = None tag_latest_date = None
doctor_latest_date = None doctor_latest_date = None
...@@ -97,12 +97,12 @@ class QueryWordAttr(object): ...@@ -97,12 +97,12 @@ class QueryWordAttr(object):
hostab = HosTab() hostab = HosTab()
@classmethod @classmethod
def get_project_query_word_weight(cls,name): def get_project_query_word_weight(cls, name):
try: try:
if not cls.tag_latest_date: if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date) tag_query_results = TagConversion.objects.filter(query=name, update_date=cls.tag_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in tag_query_results: for query_item in tag_query_results:
...@@ -120,7 +120,7 @@ class QueryWordAttr(object): ...@@ -120,7 +120,7 @@ class QueryWordAttr(object):
if not cls.doctor_latest_date: if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date) doc_query_results = DoctorConversion.objects.filter(query=name, update_date=cls.doctor_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in doc_query_results: for query_item in doc_query_results:
...@@ -138,7 +138,7 @@ class QueryWordAttr(object): ...@@ -138,7 +138,7 @@ class QueryWordAttr(object):
if not cls.hospital_latest_date: if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date) hospital_query_results = HospitalConversion.objects.filter(query=name, update_date=cls.hospital_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in hospital_query_results: for query_item in hospital_query_results:
...@@ -151,10 +151,10 @@ class QueryWordAttr(object): ...@@ -151,10 +151,10 @@ class QueryWordAttr(object):
return 0.0 return 0.0
@classmethod @classmethod
def get_query_results_num(cls,name): def get_query_results_num(cls, name):
try: try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0 results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num return results_num
...@@ -168,17 +168,17 @@ from libs.cache import redis_client ...@@ -168,17 +168,17 @@ from libs.cache import redis_client
import base64 import base64
from gm_types.doris import MIND_TYPE from gm_types.doris import MIND_TYPE
QUERY_KEY = "query:{}:set" QUERY_KEY = "query:{}:set"
LABEL_VALUE = { LABEL_VALUE = {
MIND_TYPE.PROJECT : 6, MIND_TYPE.PROJECT: 8,
MIND_TYPE.BRAND: 5, MIND_TYPE.HOSPITAL: 7,
MIND_TYPE.HOSPITAL:4, MIND_TYPE.DOCTOR: 6,
MIND_TYPE.DOCTOR:3, MIND_TYPE.FREE_FACE: 4,
MIND_TYPE.AREA:2, MIND_TYPE.FACE: 3,
MIND_TYPE.USER:1, MIND_TYPE.AREA: 2,
MIND_TYPE.UNKNOWN:0 MIND_TYPE.USER: 1,
MIND_TYPE.UNKNOWN: 0
} }
QUERY_WORD_LABEL_NEED_MODIFIED = { QUERY_WORD_LABEL_NEED_MODIFIED = {
...@@ -189,6 +189,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = { ...@@ -189,6 +189,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = {
u"脱毛": MIND_TYPE.PROJECT u"脱毛": MIND_TYPE.PROJECT
} }
def label_key(label): def label_key(label):
return LABEL_VALUE.get(label) return LABEL_VALUE.get(label)
...@@ -198,11 +199,7 @@ def get_tips_word_type(query=''): ...@@ -198,11 +199,7 @@ def get_tips_word_type(query=''):
key = QUERY_KEY.format(query_base64) key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key)))) labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
labels.append(MIND_TYPE.UNKNOWN) labels.append(MIND_TYPE.UNKNOWN)
if query in QUERY_WORD_LABEL_NEED_MODIFIED: if query in QUERY_WORD_LABEL_NEED_MODIFIED:
labels.append(MIND_TYPE.PROJECT) labels.append(MIND_TYPE.PROJECT)
labels.sort(key=label_key, reverse=True) labels.sort(key=label_key, reverse=True)
return labels[0] return labels[0]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.es import ESPerform
from trans2es.type_info import get_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump mapping to elasticsearch'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-T', '--indices_template', dest='indices_template',
help='index template name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
)
def handle(self, *args, **options):
try:
es_cli = ESPerform.get_cli()
type_name = "suggest"
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
if ESPerform.put_indices_template(es_cli=es_cli, template_file_name=template_file_name,
template_name=template_file_name):
logging.info("put indices template suc!")
else:
logging.error("put indices template err!")
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -13,7 +13,7 @@ import elasticsearch.helpers ...@@ -13,7 +13,7 @@ import elasticsearch.helpers
import sys import sys
import copy import copy
from trans2es.models import doctor,itemwiki,collectwiki,brandwiki,productwiki,tag from trans2es.models import doctor, itemwiki, collectwiki, brandwiki, productwiki, tag, wordresemble
from trans2es.utils.doctor_transfer import DoctorTransfer from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.hospital_transfer import HospitalTransfer from trans2es.utils.hospital_transfer import HospitalTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
...@@ -23,10 +23,9 @@ from trans2es.utils.productwiki_transfer import ProduceWikiTransfer ...@@ -23,10 +23,9 @@ from trans2es.utils.productwiki_transfer import ProduceWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.wordresemble import WordResemble from trans2es.utils.wordresemble import WordResemble
from libs.es import ESPerform from libs.es import ESPerform
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from trans2es.commons.words_utils import QueryWordAttr from trans2es.commons.words_utils import QueryWordAttr
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
from gm_types.gaia import ( from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag, DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
...@@ -137,9 +136,12 @@ class TypeInfo(object): ...@@ -137,9 +136,12 @@ class TypeInfo(object):
)) ))
continue continue
data = self.get_data_func(instance) data = self.get_data_func(instance)
(item_dict, suggest_list) = data
if item_dict["tips_name_type"] == 4:
instance.name = instance.keyword
resemble_list = WordResemble.get_word_resemble_list(str(instance.name)) resemble_list = WordResemble.get_word_resemble_list(str(instance.name))
(item_dict, suggest_list) = data
for suggest_item in suggest_list: for suggest_item in suggest_list:
suggest_dict = copy.deepcopy(item_dict) suggest_dict = copy.deepcopy(item_dict)
suggest_dict["suggest_type"] = suggest_item["suggest_type"] suggest_dict["suggest_type"] = suggest_item["suggest_type"]
...@@ -148,11 +150,12 @@ class TypeInfo(object): ...@@ -148,11 +150,12 @@ class TypeInfo(object):
suggest_dict["suggest"] = { suggest_dict["suggest"] = {
"input": suggest_item["input"], "input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]), "weight": int(suggest_dict["offline_score"]),
"contexts":{ "contexts": {
"is_online": suggest_dict["is_online"] "is_online": suggest_dict["is_online"]
} }
} }
data_list.append(suggest_dict) data_list.append(suggest_dict)
if item_dict["tips_name_type"] != 4:
for resemble_item in resemble_list: for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict) resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item) resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
...@@ -161,7 +164,8 @@ class TypeInfo(object): ...@@ -161,7 +164,8 @@ class TypeInfo(object):
resemble_dict["offline_score"] = 0 resemble_dict["offline_score"] = 0
resemble_dict["suggest"]["weight"] = 0 resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict) data_list.append(resemble_dict)
else:
pass
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format( logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
...@@ -194,7 +198,7 @@ class TypeInfo(object): ...@@ -194,7 +198,7 @@ class TypeInfo(object):
# traceback.print_exc() # traceback.print_exc()
# es_result = 'error' # es_result = 'error'
return ESPerform.es_helpers_bulk(es,data_list,sub_index_name,True) return ESPerform.es_helpers_bulk(es, data_list, sub_index_name, True)
def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None): def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None):
data_list = self.bulk_get_data(instance_iterable) data_list = self.bulk_get_data(instance_iterable)
...@@ -269,7 +273,7 @@ def get_type_info_map(): ...@@ -269,7 +273,7 @@ def get_type_info_map():
type_info_list = [ type_info_list = [
TypeInfo( TypeInfo(
name='suggest', name='suggest',
type='doctor_tips',# doctor type='doctor_tips', # doctor
model=doctor.Doctor, model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.DOCTOR).query, query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.DOCTOR).query,
get_data_func=DoctorTransfer.get_doctor_suggest_data_list, get_data_func=DoctorTransfer.get_doctor_suggest_data_list,
...@@ -281,7 +285,7 @@ def get_type_info_map(): ...@@ -281,7 +285,7 @@ def get_type_info_map():
name='suggest', name='suggest',
type='hospital_tips', # hospital type='hospital_tips', # hospital
model=doctor.Hospital, model=doctor.Hospital,
#query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query, # query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query,
query_deferred=lambda: doctor.Hospital.objects.all().query, query_deferred=lambda: doctor.Hospital.objects.all().query,
get_data_func=HospitalTransfer.get_hospital_suggest_data_list, get_data_func=HospitalTransfer.get_hospital_suggest_data_list,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
...@@ -332,11 +336,22 @@ def get_type_info_map(): ...@@ -332,11 +336,22 @@ def get_type_info_map():
name='suggest', name='suggest',
type='tag_tips', # tag type='tag_tips', # tag
model=tag.Tag, model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query, query_deferred=lambda: tag.Tag.objects.all().filter(
tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_suggest_data_list, get_data_func=TagTransfer.get_tag_suggest_data_list,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
),
TypeInfo(
name='suggest',
type='wordrel_tips', # tag
model=wordresemble.WordRel,
query_deferred=lambda: wordresemble.WordRel.objects.all().query,
get_data_func=WordResemble.get_resemble_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
) )
] ]
...@@ -348,6 +363,7 @@ def get_type_info_map(): ...@@ -348,6 +363,7 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map _get_type_info_map_result = type_info_map
return type_info_map return type_info_map
def tips_attr_sync_to_redis_type_info_map(): def tips_attr_sync_to_redis_type_info_map():
global _get_type_info_map_result global _get_type_info_map_result
if _get_type_info_map_result: if _get_type_info_map_result:
...@@ -356,7 +372,7 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -356,7 +372,7 @@ def tips_attr_sync_to_redis_type_info_map():
type_info_list = [ type_info_list = [
TypeInfo( TypeInfo(
name='suggest', name='suggest',
type='doctor_results_num',# doctor结果数 type='doctor_results_num', # doctor结果数
model=doctor.Doctor, model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().query, query_deferred=lambda: doctor.Doctor.objects.all().query,
get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis, get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis,
...@@ -368,7 +384,8 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -368,7 +384,8 @@ def tips_attr_sync_to_redis_type_info_map():
name='suggest', name='suggest',
type='tag_results_num', # tag结果数 type='tag_results_num', # tag结果数
model=tag.Tag, model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query, query_deferred=lambda: tag.Tag.objects.all().filter(
tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis, get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
...@@ -423,6 +440,26 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -423,6 +440,26 @@ def tips_attr_sync_to_redis_type_info_map():
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
),
TypeInfo(
name='suggest',
type='wordrel_results_num', # api_wordrelresemble
model=doctor.Hospital,
query_deferred=lambda: wordresemble.WordRel.objects.all().query,
get_data_func=WordResemble.get_all_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='wordresemble_results_num', # api_wordrelresemble
model=doctor.Hospital,
query_deferred=lambda: wordresemble.WordRel.objects.all().query,
get_data_func=WordResemble.set_data_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
) )
] ]
......
...@@ -4,15 +4,15 @@ import os ...@@ -4,15 +4,15 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class DoctorTransfer(object): class DoctorTransfer(object):
...@@ -20,14 +20,15 @@ class DoctorTransfer(object): ...@@ -20,14 +20,15 @@ class DoctorTransfer(object):
def get_doctor_data_name_mapping_results_to_redis(cls, instance): def get_doctor_data_name_mapping_results_to_redis(cls, instance):
try: try:
doctor_name = instance.name.strip() doctor_name = instance.name.strip()
if doctor_name: if doctor_name:
multi_fields = { multi_fields = {
'hospital.city_province_name': 1, 'hospital.city_province_name': 1,
'hospital.name': 1, 'hospital.name': 1,
'name': 1, 'name': 1,
'hospital.city_name':1, 'hospital.city_name': 1,
'hospital.officer_name':1, 'hospital.officer_name': 1,
'service_closure_tags':1 'service_closure_tags': 1
} }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()] query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = { multi_match = {
...@@ -39,12 +40,12 @@ class DoctorTransfer(object): ...@@ -39,12 +40,12 @@ class DoctorTransfer(object):
q = { q = {
"size": 0, "size": 0,
"query":{ "query": {
"bool":{ "bool": {
"should":[ "should": [
{"multi_match": multi_match} {"multi_match": multi_match}
], ],
"must":[ "must": [
{"term": {"is_online": True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
...@@ -52,14 +53,16 @@ class DoctorTransfer(object): ...@@ -52,14 +53,16 @@ class DoctorTransfer(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="doctor", doc_type="doctor", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="doctor", doc_type="doctor", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num" redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,doctor_name) redis_data = redis_client.hget(redis_key_prefix, doctor_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 't' in redis_val_dict: if 't' in redis_val_dict:
...@@ -68,12 +71,12 @@ class DoctorTransfer(object): ...@@ -68,12 +71,12 @@ class DoctorTransfer(object):
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['d'] = doctor_results redis_val_dict['d'] = doctor_results
logging.info("duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix),str(redis_val_dict))) logging.info(
redis_client.hset(redis_key_prefix,doctor_name, json.dumps(redis_val_dict)) "duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix), str(redis_val_dict)))
redis_client.hset(redis_key_prefix, doctor_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_doctor_suggest_data_list(cls, instance): def get_doctor_suggest_data_list(cls, instance):
try: try:
...@@ -94,7 +97,7 @@ class DoctorTransfer(object): ...@@ -94,7 +97,7 @@ class DoctorTransfer(object):
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
...@@ -4,13 +4,14 @@ import os ...@@ -4,13 +4,14 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class HospitalTransfer(object): class HospitalTransfer(object):
...@@ -19,35 +20,37 @@ class HospitalTransfer(object): ...@@ -19,35 +20,37 @@ class HospitalTransfer(object):
try: try:
ret_list = list() ret_list = list()
name = instance.name.strip()
item_dict = dict() item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name)) item_dict["id"] = getMd5Digest(str(name))
item_dict["ori_name"] = instance.name item_dict["ori_name"] = name
item_dict["is_online"] = instance.is_online item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name) item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name) item_dict["results_num"] = QueryWordAttr.get_query_results_num(name)
item_dict["type_flag"] = get_tips_word_type(instance.name) item_dict["type_flag"] = get_tips_word_type(name)
item_dict["offline_score"] = 0.0 item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1 item_dict["tips_name_type"] = 1
ret_list.append(item_dict) ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
@classmethod @classmethod
def get_hospital_lat_lng_info_to_redis(cls, instance): def get_hospital_lat_lng_info_to_redis(cls, instance):
try: try:
redis_key_prefix = "search_tips:hospital_latlng_info" redis_key_prefix = "search_tips:hospital_latlng_info"
name = instance.name name = instance.name.strip()
lng = instance.baidu_loc_lng lng = instance.baidu_loc_lng
lat = instance.baidu_loc_lat lat = instance.baidu_loc_lat
pos_list = [lng,lat] pos_list = [lng, lat]
redis_client.hset(redis_key_prefix,name,pos_list) redis_client.hset(redis_key_prefix, name, pos_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -4,15 +4,14 @@ import os ...@@ -4,15 +4,14 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class ItemWikiTransfer(object): class ItemWikiTransfer(object):
...@@ -44,12 +43,12 @@ class ItemWikiTransfer(object): ...@@ -44,12 +43,12 @@ class ItemWikiTransfer(object):
q = { q = {
"size": 0, "size": 0,
"query":{ "query": {
"bool":{ "bool": {
"should":[ "should": [
{"multi_match": multi_match} {"multi_match": multi_match}
], ],
"must":[ "must": [
{"term": {"is_online": True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
...@@ -57,28 +56,29 @@ class ItemWikiTransfer(object): ...@@ -57,28 +56,29 @@ class ItemWikiTransfer(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="newwiki", doc_type="newwiki", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="newwiki", doc_type="newwiki", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num" redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,wiki_name) redis_data = redis_client.hget(redis_key_prefix, wiki_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 'w' in redis_val_dict: if 't' in redis_val_dict:
total_count += int(redis_val_dict['w']) total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['w'] = doctor_results redis_val_dict['w'] = doctor_results
redis_client.hset(redis_key_prefix,wiki_name, json.dumps(redis_val_dict)) redis_client.hset(redis_key_prefix, wiki_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_itemwiki_suggest_data_list(cls, instance): def get_itemwiki_suggest_data_list(cls, instance):
try: try:
...@@ -99,7 +99,7 @@ class ItemWikiTransfer(object): ...@@ -99,7 +99,7 @@ class ItemWikiTransfer(object):
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
\ No newline at end of file
...@@ -4,18 +4,17 @@ import os ...@@ -4,18 +4,17 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class TagTransfer(object): class TagTransfer(object):
tips_num_redis_key_prefix = "search_tips:tips_mapping_num" tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod @classmethod
...@@ -60,29 +59,31 @@ class TagTransfer(object): ...@@ -60,29 +59,31 @@ class TagTransfer(object):
{'multi_match': multi_match}, {'multi_match': multi_match},
sku_query sku_query
], ],
"must":[ "must": [
{"term":{"is_online":True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="service", doc_type="service", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 's' in redis_val_dict: if 't' in redis_val_dict:
total_count += int(redis_val_dict['s']) total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict)) redis_client.hset(cls.tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -106,7 +107,7 @@ class TagTransfer(object): ...@@ -106,7 +107,7 @@ class TagTransfer(object):
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
\ No newline at end of file
...@@ -4,24 +4,30 @@ import os ...@@ -4,24 +4,30 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest import base64
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.models import wordresemble from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
class WordResemble(object): class WordResemble(object):
@classmethod @classmethod
def get_word_resemble_list(cls,keyword): def get_word_resemble_list(cls, keyword):
try: try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword) query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list() temp_list = list()
for sql_obj in query_sql_item: for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word',flat=True))) temp_list.extend(list(sql_obj.all_resembles.all().values_list('word', flat=True)))
resemble_list = list() resemble_list = list()
for item in temp_list: for item in temp_list:
...@@ -31,3 +37,164 @@ class WordResemble(object): ...@@ -31,3 +37,164 @@ class WordResemble(object):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.keyword))
item_dict["ori_name"] = instance.keyword
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.keyword)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.keyword)
item_dict["type_flag"] = get_tips_word_type(instance.keyword)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.keyword)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
total_count = 0
instance.name = instance.keyword
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['r'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls, instance):
try:
QUERY_KEY = "query:{}:search_tips"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
# result = wordresemble.WordRel.objects.all().values("keyword", "id")
# for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=instance.keyword)
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
query_base64 = base64.b64encode(instance.keyword.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
# 存储近义词
# query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(value_name)
# value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
value_names = query_base64
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, instance.keyword)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_names + str(":" + str(total_count))
redis_client.sadd(key, value)
if words:
for w in words:
value_name_w = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name_w + str(":" + str(total_count))
redis_client.sadd(key, value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment