Commit a945b9fe authored by lixiaofang's avatar lixiaofang

添加近义词

parent 18c56ff7
...@@ -12,6 +12,10 @@ from libs.tools import g_hospital_pos_dict ...@@ -12,6 +12,10 @@ from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance from libs.tools import point_distance
# lat经度
# lng纬度
def get_suggest_tips(query, lat, lng, offset=0, size=50): def get_suggest_tips(query, lat, lng, offset=0, size=50):
try: try:
# ios输入法在某些情况下会携带\\u2006 # ios输入法在某些情况下会携带\\u2006
...@@ -51,8 +55,11 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50): ...@@ -51,8 +55,11 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
highlight_marks) highlight_marks)
if hit_item["_source"]["type_flag"] == "hospital": if hit_item["_source"]["type_flag"] == "hospital":
if lat is not None and lng is not None:
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict: if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng, lat, g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0], distance = point_distance(lng, lat,
g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],
g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1]) g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000 * 50: if distance < 1000 * 50:
if distance < 1000: if distance < 1000:
...@@ -61,11 +68,15 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50): ...@@ -61,11 +68,15 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
else: else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米" hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
else: else:
hit_item["_source"]["describe"] = "约" + str(round(1.0 * distance / 1000, 1)) + "km" hit_item["_source"]["describe"] = "约" + str(
round(1.0 * distance / 1000, 1)) + "km"
else: else:
hit_item["_source"]["describe"] = ">50km" hit_item["_source"]["describe"] = ">50km"
else: else:
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = ""
else: else:
if hit_item["_source"]["type_flag"] == "doctor": if hit_item["_source"]["type_flag"] == "doctor":
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
......
...@@ -12,9 +12,8 @@ from libs.tools import json_http_response ...@@ -12,9 +12,8 @@ from libs.tools import json_http_response
from search.utils.auto_tips import get_suggest_tips from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query") @bind("search_tips/search/auto_complete_query")
def auto_complete_query(q,lat,lng): def auto_complete_query(q, lat, lng):
try: try:
"""auto complate words/tags/doctors etc. """auto complate words/tags/doctors etc.
...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng): ...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng):
# if not q: # if not q:
# return json_http_response({'error': 0, 'data': []}) # return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q,float(lat),float(lng)) data = get_suggest_tips(q, float(lat), float(lng))
result = { result = {
'error': 0, 'error': 0,
...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng): ...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng):
return result return result
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error":1,"data":[]} return {"error": 1, "data": []}
...@@ -41,7 +41,7 @@ DATABASES = { ...@@ -41,7 +41,7 @@ DATABASES = {
ES_INFO_LIST = [ ES_INFO_LIST = [
{ {
"host": "10.29.130.141", "host": "101.200.54.249",
"port": 9200 "port": 9200
} }
] ]
......
...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE ...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid import uuid
def uuid4(): def uuid4():
""" """
...@@ -19,10 +17,11 @@ def uuid4(): ...@@ -19,10 +17,11 @@ def uuid4():
""" """
return uuid.uuid4().hex return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name): def get_tips_suggest_list(instance_cn_name):
try: try:
#ch_full_weight = 6.0 * 1000 # ch_full_weight = 6.0 * 1000
#py_full_weight = 3.0 * 1000 # py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000 full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000 py_acronym_full_weight = 3.0 * 1000
...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name):
# 中文 # 中文
for i in range(len(ch_full_word)): for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip() ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i]!="(" and ch_full_word[i]!=")": if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1 suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [ch_name_term], "input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if ch_name_term[0] not in suggest_dict: if ch_name_term[0] not in suggest_dict:
...@@ -64,13 +64,14 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -64,13 +64,14 @@ def get_tips_suggest_list(instance_cn_name):
if py_full_word != ch_full_word: if py_full_word != ch_full_word:
for i in range(len(py_full_word)): for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip() py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i]!="(" and py_full_word[i]!=")": if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3 suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_name_term], "input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
...@@ -88,14 +89,15 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -88,14 +89,15 @@ def get_tips_suggest_list(instance_cn_name):
if py_acronym_full_word != py_full_word: if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)): for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip() py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i]!="(" and py_acronym_full_word[i]!=")": if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len( prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5 suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_acronym_term], "input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if py_acronym_term[0] not in suggest_dict: if py_acronym_term[0] not in suggest_dict:
...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"] suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"] suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values() return suggest_dict.values()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -5,7 +5,8 @@ import logging ...@@ -5,7 +5,8 @@ import logging
import traceback import traceback
import json import json
from libs.cache import redis_client from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion from trans2es.models.query_word_conversion import TagConversion, DoctorConversion, HospitalConversion
class TagTab(object): class TagTab(object):
Weight = { Weight = {
...@@ -84,7 +85,6 @@ class HosTab(TagTab): ...@@ -84,7 +85,6 @@ class HosTab(TagTab):
class QueryWordAttr(object): class QueryWordAttr(object):
# 获取TagConversion表最新的日期 # 获取TagConversion表最新的日期
tag_latest_date = None tag_latest_date = None
doctor_latest_date = None doctor_latest_date = None
...@@ -97,12 +97,12 @@ class QueryWordAttr(object): ...@@ -97,12 +97,12 @@ class QueryWordAttr(object):
hostab = HosTab() hostab = HosTab()
@classmethod @classmethod
def get_project_query_word_weight(cls,name): def get_project_query_word_weight(cls, name):
try: try:
if not cls.tag_latest_date: if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date) tag_query_results = TagConversion.objects.filter(query=name, update_date=cls.tag_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in tag_query_results: for query_item in tag_query_results:
...@@ -120,7 +120,7 @@ class QueryWordAttr(object): ...@@ -120,7 +120,7 @@ class QueryWordAttr(object):
if not cls.doctor_latest_date: if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date) doc_query_results = DoctorConversion.objects.filter(query=name, update_date=cls.doctor_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in doc_query_results: for query_item in doc_query_results:
...@@ -138,7 +138,7 @@ class QueryWordAttr(object): ...@@ -138,7 +138,7 @@ class QueryWordAttr(object):
if not cls.hospital_latest_date: if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date) hospital_query_results = HospitalConversion.objects.filter(query=name, update_date=cls.hospital_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in hospital_query_results: for query_item in hospital_query_results:
...@@ -151,10 +151,10 @@ class QueryWordAttr(object): ...@@ -151,10 +151,10 @@ class QueryWordAttr(object):
return 0.0 return 0.0
@classmethod @classmethod
def get_query_results_num(cls,name): def get_query_results_num(cls, name):
try: try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0 results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num return results_num
...@@ -168,17 +168,16 @@ from libs.cache import redis_client ...@@ -168,17 +168,16 @@ from libs.cache import redis_client
import base64 import base64
from gm_types.doris import MIND_TYPE from gm_types.doris import MIND_TYPE
QUERY_KEY = "query:{}:set" QUERY_KEY = "query:{}:set"
LABEL_VALUE = { LABEL_VALUE = {
MIND_TYPE.PROJECT : 6, MIND_TYPE.PROJECT: 6,
MIND_TYPE.BRAND: 5, MIND_TYPE.BRAND: 5,
MIND_TYPE.HOSPITAL:4, MIND_TYPE.HOSPITAL: 4,
MIND_TYPE.DOCTOR:3, MIND_TYPE.DOCTOR: 3,
MIND_TYPE.AREA:2, MIND_TYPE.AREA: 2,
MIND_TYPE.USER:1, MIND_TYPE.USER: 1,
MIND_TYPE.UNKNOWN:0 MIND_TYPE.UNKNOWN: 0
} }
QUERY_WORD_LABEL_NEED_MODIFIED = { QUERY_WORD_LABEL_NEED_MODIFIED = {
...@@ -189,6 +188,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = { ...@@ -189,6 +188,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = {
u"脱毛": MIND_TYPE.PROJECT u"脱毛": MIND_TYPE.PROJECT
} }
def label_key(label): def label_key(label):
return LABEL_VALUE.get(label) return LABEL_VALUE.get(label)
...@@ -204,5 +204,3 @@ def get_tips_word_type(query=''): ...@@ -204,5 +204,3 @@ def get_tips_word_type(query=''):
labels.sort(key=label_key, reverse=True) labels.sort(key=label_key, reverse=True)
return labels[0] return labels[0]
...@@ -29,9 +29,23 @@ class WordRel(models.Model): ...@@ -29,9 +29,23 @@ class WordRel(models.Model):
class WordRelResemble(models.Model): class WordRelResemble(models.Model):
"""近义词数据表 """近义词数据表
""" """
class Meta: class Meta:
app_label = 'api' app_label = 'api'
db_table = 'api_wordrelresemble' db_table = 'api_wordrelresemble'
wordrel = models.ForeignKey(WordRel, related_name='all_resembles') wordrel = models.ForeignKey(WordRel, related_name='all_resembles')
word = models.CharField(u'近义词', max_length=50, db_index=True) word = models.CharField(u'近义词', max_length=50, db_index=True)
class WordRelSynonym(models.Model):
"""
同义词数据表
"""
class Meta:
app_label = "api"
db_table = "api_wordrelsynonym"
wordrel = models.ForeignKey(WordRel, related_name='all_resembles')
word = models.CharField(u'同义词', max_length=50, db_index=True)
...@@ -13,7 +13,7 @@ import elasticsearch.helpers ...@@ -13,7 +13,7 @@ import elasticsearch.helpers
import sys import sys
import copy import copy
from trans2es.models import doctor,itemwiki,collectwiki,brandwiki,productwiki,tag from trans2es.models import doctor, itemwiki, collectwiki, brandwiki, productwiki, tag, wordresemble
from trans2es.utils.doctor_transfer import DoctorTransfer from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.hospital_transfer import HospitalTransfer from trans2es.utils.hospital_transfer import HospitalTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
...@@ -23,10 +23,9 @@ from trans2es.utils.productwiki_transfer import ProduceWikiTransfer ...@@ -23,10 +23,9 @@ from trans2es.utils.productwiki_transfer import ProduceWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.wordresemble import WordResemble from trans2es.utils.wordresemble import WordResemble
from libs.es import ESPerform from libs.es import ESPerform
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from trans2es.commons.words_utils import QueryWordAttr from trans2es.commons.words_utils import QueryWordAttr
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
from gm_types.gaia import ( from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag, DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
...@@ -137,6 +136,7 @@ class TypeInfo(object): ...@@ -137,6 +136,7 @@ class TypeInfo(object):
)) ))
continue continue
data = self.get_data_func(instance) data = self.get_data_func(instance)
resemble_list = WordResemble.get_word_resemble_list(str(instance.name)) resemble_list = WordResemble.get_word_resemble_list(str(instance.name))
(item_dict, suggest_list) = data (item_dict, suggest_list) = data
...@@ -145,14 +145,27 @@ class TypeInfo(object): ...@@ -145,14 +145,27 @@ class TypeInfo(object):
suggest_dict["suggest_type"] = suggest_item["suggest_type"] suggest_dict["suggest_type"] = suggest_item["suggest_type"]
suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"] suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"]
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"]) suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
if suggest_item["suggest_type"] == 1 and item_dict["tips_name_type"] == 4:
for resemble in resemble_list:
suggest_item["input"].append(str(resemble))
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]),
"contexts": {
"is_online": suggest_dict["is_online"]
}
}
else:
suggest_dict["suggest"] = { suggest_dict["suggest"] = {
"input": suggest_item["input"], "input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]), "weight": int(suggest_dict["offline_score"]),
"contexts":{ "contexts": {
"is_online": suggest_dict["is_online"] "is_online": suggest_dict["is_online"]
} }
} }
data_list.append(suggest_dict) data_list.append(suggest_dict)
if item_dict["tips_name_type"] != 4:
for resemble_item in resemble_list: for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict) resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item) resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
...@@ -162,6 +175,9 @@ class TypeInfo(object): ...@@ -162,6 +175,9 @@ class TypeInfo(object):
resemble_dict["suggest"]["weight"] = 0 resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict) data_list.append(resemble_dict)
else:
pass
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format( logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
...@@ -194,7 +210,7 @@ class TypeInfo(object): ...@@ -194,7 +210,7 @@ class TypeInfo(object):
# traceback.print_exc() # traceback.print_exc()
# es_result = 'error' # es_result = 'error'
return ESPerform.es_helpers_bulk(es,data_list,sub_index_name,True) return ESPerform.es_helpers_bulk(es, data_list, sub_index_name, True)
def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None): def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None):
data_list = self.bulk_get_data(instance_iterable) data_list = self.bulk_get_data(instance_iterable)
...@@ -269,7 +285,7 @@ def get_type_info_map(): ...@@ -269,7 +285,7 @@ def get_type_info_map():
type_info_list = [ type_info_list = [
TypeInfo( TypeInfo(
name='suggest', name='suggest',
type='doctor_tips',# doctor type='doctor_tips', # doctor
model=doctor.Doctor, model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.DOCTOR).query, query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.DOCTOR).query,
get_data_func=DoctorTransfer.get_doctor_suggest_data_list, get_data_func=DoctorTransfer.get_doctor_suggest_data_list,
...@@ -281,7 +297,7 @@ def get_type_info_map(): ...@@ -281,7 +297,7 @@ def get_type_info_map():
name='suggest', name='suggest',
type='hospital_tips', # hospital type='hospital_tips', # hospital
model=doctor.Hospital, model=doctor.Hospital,
#query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query, # query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query,
query_deferred=lambda: doctor.Hospital.objects.all().query, query_deferred=lambda: doctor.Hospital.objects.all().query,
get_data_func=HospitalTransfer.get_hospital_suggest_data_list, get_data_func=HospitalTransfer.get_hospital_suggest_data_list,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
...@@ -332,11 +348,22 @@ def get_type_info_map(): ...@@ -332,11 +348,22 @@ def get_type_info_map():
name='suggest', name='suggest',
type='tag_tips', # tag type='tag_tips', # tag
model=tag.Tag, model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query, query_deferred=lambda: tag.Tag.objects.all().filter(
tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_suggest_data_list, get_data_func=TagTransfer.get_tag_suggest_data_list,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
),
TypeInfo(
name='suggest',
type='wordrel_tips', # tag
model=tag.Tag,
query_deferred=lambda: wordresemble.WordRel.objects.all().query,
get_data_func=WordResemble.get_resemble_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
) )
] ]
...@@ -348,6 +375,7 @@ def get_type_info_map(): ...@@ -348,6 +375,7 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map _get_type_info_map_result = type_info_map
return type_info_map return type_info_map
def tips_attr_sync_to_redis_type_info_map(): def tips_attr_sync_to_redis_type_info_map():
global _get_type_info_map_result global _get_type_info_map_result
if _get_type_info_map_result: if _get_type_info_map_result:
...@@ -356,7 +384,7 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -356,7 +384,7 @@ def tips_attr_sync_to_redis_type_info_map():
type_info_list = [ type_info_list = [
TypeInfo( TypeInfo(
name='suggest', name='suggest',
type='doctor_results_num',# doctor结果数 type='doctor_results_num', # doctor结果数
model=doctor.Doctor, model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().query, query_deferred=lambda: doctor.Doctor.objects.all().query,
get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis, get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis,
...@@ -368,7 +396,8 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -368,7 +396,8 @@ def tips_attr_sync_to_redis_type_info_map():
name='suggest', name='suggest',
type='tag_results_num', # tag结果数 type='tag_results_num', # tag结果数
model=tag.Tag, model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query, query_deferred=lambda: tag.Tag.objects.all().filter(
tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis, get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
...@@ -423,6 +452,16 @@ def tips_attr_sync_to_redis_type_info_map(): ...@@ -423,6 +452,16 @@ def tips_attr_sync_to_redis_type_info_map():
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
),
TypeInfo(
name='suggest',
type='wordrel_results_num', # api_wordrelresemble
model=doctor.Hospital,
query_deferred=lambda: wordresemble.WordRel.objects.all().query,
get_data_func=WordResemble.get_all_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
) )
] ]
......
...@@ -20,20 +20,22 @@ class HospitalTransfer(object): ...@@ -20,20 +20,22 @@ class HospitalTransfer(object):
try: try:
ret_list = list() ret_list = list()
name = instance.name.strip()
item_dict = dict() item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name)) item_dict["id"] = getMd5Digest(str(name))
item_dict["ori_name"] = instance.name item_dict["ori_name"] = name
item_dict["is_online"] = True item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name) item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name) item_dict["results_num"] = QueryWordAttr.get_query_results_num(name)
item_dict["type_flag"] = get_tips_word_type(instance.name) item_dict["type_flag"] = get_tips_word_type(name)
item_dict["offline_score"] = 0.0 item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1 item_dict["tips_name_type"] = 1
ret_list.append(item_dict) ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(name)
return (item_dict, suggest_list) return (item_dict, suggest_list)
except: except:
......
...@@ -4,18 +4,17 @@ import os ...@@ -4,18 +4,17 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class TagTransfer(object): class TagTransfer(object):
tips_num_redis_key_prefix = "search_tips:tips_mapping_num" tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod @classmethod
...@@ -60,20 +59,22 @@ class TagTransfer(object): ...@@ -60,20 +59,22 @@ class TagTransfer(object):
{'multi_match': multi_match}, {'multi_match': multi_match},
sku_query sku_query
], ],
"must":[ "must": [
{"term":{"is_online":True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="service", doc_type="service", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 's' in redis_val_dict: if 's' in redis_val_dict:
...@@ -82,7 +83,7 @@ class TagTransfer(object): ...@@ -82,7 +83,7 @@ class TagTransfer(object):
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict)) redis_client.hset(cls.tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -106,7 +107,7 @@ class TagTransfer(object): ...@@ -106,7 +107,7 @@ class TagTransfer(object):
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
\ No newline at end of file
...@@ -10,6 +10,11 @@ from libs.cache import redis_client ...@@ -10,6 +10,11 @@ from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.models import wordresemble from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
class WordResemble(object): class WordResemble(object):
...@@ -31,3 +36,104 @@ class WordResemble(object): ...@@ -31,3 +36,104 @@ class WordResemble(object):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.word))
item_dict["ori_name"] = instance.word
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 's' in redis_val_dict:
total_count += int(redis_val_dict['s'])
redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment