Commit a8df3524 authored by lixiaofang's avatar lixiaofang

add

parent ca099d92
...@@ -58,7 +58,7 @@ class ESPerform(object): ...@@ -58,7 +58,7 @@ class ESPerform(object):
os.path.dirname(__file__), os.path.dirname(__file__),
'..', 'trans2es', 'mapping', '%s.json' % (doc_type,)) '..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
if doc_type == "associate_tag" or doc_type == "associate_tag_brand" or doc_type == "associate_tag_doctor" \ if doc_type == "associate_tag" or doc_type == "associate_tag_brand" or doc_type == "associate_tag_doctor" \
or doc_type == "associate_tag_instrument" or doc_type == "associate_tag_hospital" or doc_type == "associate_tag_project"\ or doc_type == "associate_tag_instrument" or doc_type == "associate_tag_hospital" or doc_type == "associate_tag_project" \
or doc_type == "associate_tag_position": or doc_type == "associate_tag_position":
mapping_file_path = os.path.join( mapping_file_path = os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
...@@ -184,6 +184,7 @@ class ESPerform(object): ...@@ -184,6 +184,7 @@ class ESPerform(object):
official_index_name = cls.get_official_index_name(sub_index_name, "read") official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
print(index_exists)
if not index_exists: if not index_exists:
if not auto_create_index: if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name) logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
......
...@@ -16,9 +16,29 @@ from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type ...@@ -16,9 +16,29 @@ from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None): def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
"""
这个地方目前是有三个逻辑 分两个灰度验证
50%前段强加权后根据tab转化率和词频进行排序 索引 suggest gray_number=1 尾号["0", "1", "2", "3", "c", "d", "e", "f"]
25%去掉前段强加权后 直接根据tag转化率和词频进行排序 suggest-v1 gray_number=2 尾号["4", "5", "6", "a"]
25%去掉前段强加权后 根据tag转化率和词频进行排序 再根据前段加权展示排序 suggest-v1 gray_number=3 尾号["9", "8", "7", "b"]
:param query:
:param lat:
:param lng:
:param offset:
:param size:
:param device_id:
:return:
"""
try: try:
###加两层灰度 ###在原来的逻辑上加两层灰度
gray_number = recommed_service_category_device_id(device_id)
if gray_number == 1:
sub_index_name = "suggest"
else:
sub_index_name = "suggest-v1"
###获取联想到的数据 ###获取联想到的数据
# ios输入法在某些情况下会携带\\u2006 # ios输入法在某些情况下会携带\\u2006
...@@ -49,7 +69,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None): ...@@ -49,7 +69,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list = list() ret_list = list()
doctor_hospital_equal_query = list() doctor_hospital_equal_query = list()
tag_equal_query = list() tag_equal_query = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=sub_index_name, query_body=q,
offset=offset, size=size, is_suggest_request=True) offset=offset, size=size, is_suggest_request=True)
for tips_item in result_dict["suggest"]["tips-suggest"]: for tips_item in result_dict["suggest"]["tips-suggest"]:
...@@ -110,9 +130,41 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None): ...@@ -110,9 +130,41 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list.extend(get_tag_wiki_data) ret_list.extend(get_tag_wiki_data)
ret_list.extend(get_doctor_hospital_data) ret_list.extend(get_doctor_hospital_data)
if len(result_dict["suggest"]["tips-suggest"]) >= 50: if len(ret_list) >= 50:
return ret_list
if gray_number in (1, 2):
logging.info("get-----------------ret_list:%s" % ret_list)
return ret_list[:50]
else:
logging.info("get+++++++++++++++++ret_list:%s" % ret_list)
# 在去掉强加权的逻辑上根据词频和tag转化率排序后再前段强加权
front_data = []
end_data = []
equal_data = []
need_change_sort = ret_list[:30]
for item in need_change_sort:
ori_name = item.get("ori_name", None)
if query == ori_name:
equal_data.append(item)
elif query == ori_name[:len(query)]:
front_data.append(item)
else:
end_data.append(item)
logging.info("get+++++++++++++++++equal_data:%s" % equal_data)
logging.info("get+++++++++++++++++front_data:%s" % front_data)
logging.info("get+++++++++++++++++end_data:%s" % end_data)
logging.info("get+++++++++++++++++ret_list:%s" % ret_list)
equal_data.extend(front_data)
equal_data.extend(end_data)
equal_data.extend(ret_list[30:])
ret_list = equal_data
return ret_list
else: else:
query_ret_list = [] query_ret_list = []
wordresemble_ret_list = [] wordresemble_ret_list = []
...@@ -197,21 +249,26 @@ def set_highlihgt(query=None, ori_name=None): ...@@ -197,21 +249,26 @@ def set_highlihgt(query=None, ori_name=None):
return highlight_name return highlight_name
def recommed_service_category_device_id(device_id, real_cary=False): def recommed_service_category_device_id(device_id):
try: try:
''' '''
设备品类显示, 是否命中灰度 设备品类显示, 是否命中灰度
''' '''
categroy_select_cary = ["0", "1", "2", "3", "4", "a", "b", "c"] categroy_select_cary1 = ["0", "1", "2", "3", "c", "d", "e", "f"]
categroy_select_cary2 = ["4", "5", "6", "a"]
categroy_select_cary3 = ["9", "8", "7", "b"]
if not device_id: if not device_id:
return False return 1
hd_id = hashlib.md5(str(device_id).encode()).hexdigest() hd_id = hashlib.md5(str(device_id).encode()).hexdigest()
is_gray = hd_id[-1] in categroy_select_cary is_gray = hd_id[-1]
if not is_gray:
gray_devices_key = "gm:gray:devices:key" if is_gray in categroy_select_cary2:
return redis_client.sismember(gray_devices_key, device_id) return 2
return is_gray elif is_gray in categroy_select_cary3:
return 3
else:
return 1
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) return 1
return False
...@@ -120,11 +120,11 @@ def get_tips_suggest_list_v1(instance_cn_name): ...@@ -120,11 +120,11 @@ def get_tips_suggest_list_v1(instance_cn_name):
try: try:
# ch_full_weight = 6.0 * 1000 # ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000 # py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000 full_weight = 1
py_acronym_full_weight = 3.0 * 1000 py_acronym_full_weight = 1
py_acronym_prefix_weight = 2 py_acronym_prefix_weight = 1
ch_prefix_weight = 1.5 ch_prefix_weight = 1
py_prefix_weight = 1.0 py_prefix_weight = 1.0
# 命中开始部分加权 # 命中开始部分加权
......
...@@ -21,7 +21,7 @@ from trans2es.utils.collectwiki_transfer import CollectWikiTransfer, CollectWiki ...@@ -21,7 +21,7 @@ from trans2es.utils.collectwiki_transfer import CollectWikiTransfer, CollectWiki
from trans2es.utils.brandwiki_transfer import BrandWikiTransfer, BrandWikiTransferV1 from trans2es.utils.brandwiki_transfer import BrandWikiTransfer, BrandWikiTransferV1
from trans2es.utils.productwiki_transfer import ProduceWikiTransfer, ProduceWikiTransferV1 from trans2es.utils.productwiki_transfer import ProduceWikiTransfer, ProduceWikiTransferV1
from trans2es.utils.tag_transfer import TagTransfer, TagTransferV1 from trans2es.utils.tag_transfer import TagTransfer, TagTransferV1
from trans2es.utils.wordresemble import WordResemble from trans2es.utils.wordresemble import WordResemble, WordResembleV1
from libs.es import ESPerform from libs.es import ESPerform
from libs.tools import tzlc, getMd5Digest from libs.tools import tzlc, getMd5Digest
from trans2es.commons.words_utils import QueryWordAttr from trans2es.commons.words_utils import QueryWordAttr
...@@ -445,7 +445,7 @@ def get_type_info_map(): ...@@ -445,7 +445,7 @@ def get_type_info_map():
model=wordresemble.WordRel, model=wordresemble.WordRel,
query_deferred=lambda: wordresemble.WordRel.objects.filter( query_deferred=lambda: wordresemble.WordRel.objects.filter(
category__in=[13, 12, 11, 9, 1]).query, category__in=[13, 12, 11, 9, 1]).query,
get_data_func=WordResemble.get_resemble_list, get_data_func=WordResembleV1.get_resemble_list,
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
......
...@@ -10,7 +10,7 @@ from libs.cache import redis_client ...@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list,get_tips_suggest_list_v1 from trans2es.commons.commons import get_tips_suggest_list, get_tips_suggest_list_v1
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
...@@ -97,8 +97,8 @@ class TagTransfer(object): ...@@ -97,8 +97,8 @@ class TagTransfer(object):
for i in cut_word: for i in cut_word:
if keyword.find(i) >= 0: if keyword.find(i) >= 0:
cut_bool = True cut_bool = True
item_dict = dict()
if cut_bool == False: if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name)) item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online item_dict["is_online"] = instance.is_online
...@@ -202,8 +202,8 @@ class TagTransferV1(object): ...@@ -202,8 +202,8 @@ class TagTransferV1(object):
for i in cut_word: for i in cut_word:
if keyword.find(i) >= 0: if keyword.find(i) >= 0:
cut_bool = True cut_bool = True
item_dict = dict()
if cut_bool == False: if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name)) item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online item_dict["is_online"] = instance.is_online
......
...@@ -12,7 +12,7 @@ import json ...@@ -12,7 +12,7 @@ import json
from django.conf import settings from django.conf import settings
from trans2es.models import wordresemble from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list, get_tips_suggest_list_v1
from trans2es.utils.doctor_transfer import DoctorTransfer from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer from trans2es.utils.tag_transfer import TagTransfer
...@@ -201,3 +201,188 @@ class WordResemble(object): ...@@ -201,3 +201,188 @@ class WordResemble(object):
redis_client.hset(QUERY_KEY, query_base64, json.dumps(keyword_value)) redis_client.hset(QUERY_KEY, query_base64, json.dumps(keyword_value))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class WordResembleV1(object):
@classmethod
def get_word_resemble_list(cls, keyword):
try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list()
for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word', flat=True)))
resemble_list = list()
for item in temp_list:
resemble_list.extend(item.split("、"))
return resemble_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
keyword = instance.keyword
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict["id"] = getMd5Digest(str(instance.keyword))
item_dict["ori_name"] = instance.keyword
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.keyword)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.keyword)
item_dict["type_flag"] = get_tips_word_type(instance.keyword)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list_v1(instance.keyword)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
total_count = 0
instance.name = instance.keyword
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['r'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls, instance):
try:
keyword_value = []
QUERY_KEY = "query:search_tip"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
query_sql_item = wordresemble.WordRel.objects.filter(keyword=instance.keyword)
for sql_obj in query_sql_item:
count = 0
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
query_base64 = base64.b64encode(instance.keyword.encode('utf8')).decode('utf8')
for items in words:
count += 1
wordresemble_value = []
# 先存储本体词
item_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取该同义词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
logging.info("get redis_data:%s" % redis_data)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {item_name: total_count}
keyword_value.append(value)
redis_data = redis_client.hget(tips_num_redis_key_prefix, instance.keyword)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {query_base64: total_count}
wordresemble_value.append(value)
if count == 1:
keyword_value.append(value)
if words:
for w in words:
value_name_w = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {value_name_w: total_count}
wordresemble_value.append(value)
if len(wordresemble_value) > 0:
logging.info("get type wordresemble_value:%s" % type(json.dumps(wordresemble_value)))
redis_client.hset(QUERY_KEY, item_name, json.dumps(wordresemble_value))
if len(keyword_value) > 0:
logging.info("get type keyword_value:%s" % type(json.dumps(keyword_value)))
redis_client.hset(QUERY_KEY, query_base64, json.dumps(keyword_value))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment