Commit a8df3524 authored by lixiaofang's avatar lixiaofang

add

parent ca099d92
......@@ -58,7 +58,7 @@ class ESPerform(object):
os.path.dirname(__file__),
'..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
if doc_type == "associate_tag" or doc_type == "associate_tag_brand" or doc_type == "associate_tag_doctor" \
or doc_type == "associate_tag_instrument" or doc_type == "associate_tag_hospital" or doc_type == "associate_tag_project"\
or doc_type == "associate_tag_instrument" or doc_type == "associate_tag_hospital" or doc_type == "associate_tag_project" \
or doc_type == "associate_tag_position":
mapping_file_path = os.path.join(
os.path.dirname(__file__),
......@@ -184,6 +184,7 @@ class ESPerform(object):
official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name)
print(index_exists)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
......
......@@ -16,9 +16,29 @@ from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
"""
这个地方目前是有三个逻辑 分两个灰度验证
50%前段强加权后根据tab转化率和词频进行排序 索引 suggest gray_number=1 尾号["0", "1", "2", "3", "c", "d", "e", "f"]
25%去掉前段强加权后 直接根据tag转化率和词频进行排序 suggest-v1 gray_number=2 尾号["4", "5", "6", "a"]
25%去掉前段强加权后 根据tag转化率和词频进行排序 再根据前段加权展示排序 suggest-v1 gray_number=3 尾号["9", "8", "7", "b"]
:param query:
:param lat:
:param lng:
:param offset:
:param size:
:param device_id:
:return:
"""
try:
###加两层灰度
###在原来的逻辑上加两层灰度
gray_number = recommed_service_category_device_id(device_id)
if gray_number == 1:
sub_index_name = "suggest"
else:
sub_index_name = "suggest-v1"
###获取联想到的数据
# ios输入法在某些情况下会携带\\u2006
......@@ -49,7 +69,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list = list()
doctor_hospital_equal_query = list()
tag_equal_query = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name=sub_index_name, query_body=q,
offset=offset, size=size, is_suggest_request=True)
for tips_item in result_dict["suggest"]["tips-suggest"]:
......@@ -110,9 +130,41 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list.extend(get_tag_wiki_data)
ret_list.extend(get_doctor_hospital_data)
if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list
if len(ret_list) >= 50:
if gray_number in (1, 2):
logging.info("get-----------------ret_list:%s" % ret_list)
return ret_list[:50]
else:
logging.info("get+++++++++++++++++ret_list:%s" % ret_list)
# 在去掉强加权的逻辑上根据词频和tag转化率排序后再前段强加权
front_data = []
end_data = []
equal_data = []
need_change_sort = ret_list[:30]
for item in need_change_sort:
ori_name = item.get("ori_name", None)
if query == ori_name:
equal_data.append(item)
elif query == ori_name[:len(query)]:
front_data.append(item)
else:
end_data.append(item)
logging.info("get+++++++++++++++++equal_data:%s" % equal_data)
logging.info("get+++++++++++++++++front_data:%s" % front_data)
logging.info("get+++++++++++++++++end_data:%s" % end_data)
logging.info("get+++++++++++++++++ret_list:%s" % ret_list)
equal_data.extend(front_data)
equal_data.extend(end_data)
equal_data.extend(ret_list[30:])
ret_list = equal_data
return ret_list
else:
query_ret_list = []
wordresemble_ret_list = []
......@@ -197,21 +249,26 @@ def set_highlihgt(query=None, ori_name=None):
return highlight_name
def recommed_service_category_device_id(device_id, real_cary=False):
def recommed_service_category_device_id(device_id):
try:
'''
设备品类显示, 是否命中灰度
'''
categroy_select_cary = ["0", "1", "2", "3", "4", "a", "b", "c"]
categroy_select_cary1 = ["0", "1", "2", "3", "c", "d", "e", "f"]
categroy_select_cary2 = ["4", "5", "6", "a"]
categroy_select_cary3 = ["9", "8", "7", "b"]
if not device_id:
return False
return 1
hd_id = hashlib.md5(str(device_id).encode()).hexdigest()
is_gray = hd_id[-1] in categroy_select_cary
if not is_gray:
gray_devices_key = "gm:gray:devices:key"
return redis_client.sismember(gray_devices_key, device_id)
return is_gray
is_gray = hd_id[-1]
if is_gray in categroy_select_cary2:
return 2
elif is_gray in categroy_select_cary3:
return 3
else:
return 1
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
return 1
......@@ -120,11 +120,11 @@ def get_tips_suggest_list_v1(instance_cn_name):
try:
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000
full_weight = 1
py_acronym_full_weight = 1
py_acronym_prefix_weight = 2
ch_prefix_weight = 1.5
py_acronym_prefix_weight = 1
ch_prefix_weight = 1
py_prefix_weight = 1.0
# 命中开始部分加权
......
......@@ -21,7 +21,7 @@ from trans2es.utils.collectwiki_transfer import CollectWikiTransfer, CollectWiki
from trans2es.utils.brandwiki_transfer import BrandWikiTransfer, BrandWikiTransferV1
from trans2es.utils.productwiki_transfer import ProduceWikiTransfer, ProduceWikiTransferV1
from trans2es.utils.tag_transfer import TagTransfer, TagTransferV1
from trans2es.utils.wordresemble import WordResemble
from trans2es.utils.wordresemble import WordResemble, WordResembleV1
from libs.es import ESPerform
from libs.tools import tzlc, getMd5Digest
from trans2es.commons.words_utils import QueryWordAttr
......@@ -445,7 +445,7 @@ def get_type_info_map():
model=wordresemble.WordRel,
query_deferred=lambda: wordresemble.WordRel.objects.filter(
category__in=[13, 12, 11, 9, 1]).query,
get_data_func=WordResemble.get_resemble_list,
get_data_func=WordResembleV1.get_resemble_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
......
......@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list,get_tips_suggest_list_v1
from trans2es.commons.commons import get_tips_suggest_list, get_tips_suggest_list_v1
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
......@@ -97,8 +97,8 @@ class TagTransfer(object):
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
item_dict = dict()
if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
......@@ -202,8 +202,8 @@ class TagTransferV1(object):
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
item_dict = dict()
if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
......
......@@ -12,7 +12,7 @@ import json
from django.conf import settings
from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.commons import get_tips_suggest_list, get_tips_suggest_list_v1
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
......@@ -201,3 +201,188 @@ class WordResemble(object):
redis_client.hset(QUERY_KEY, query_base64, json.dumps(keyword_value))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class WordResembleV1(object):
@classmethod
def get_word_resemble_list(cls, keyword):
try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list()
for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word', flat=True)))
resemble_list = list()
for item in temp_list:
resemble_list.extend(item.split("、"))
return resemble_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
keyword = instance.keyword
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict["id"] = getMd5Digest(str(instance.keyword))
item_dict["ori_name"] = instance.keyword
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.keyword)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.keyword)
item_dict["type_flag"] = get_tips_word_type(instance.keyword)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list_v1(instance.keyword)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
total_count = 0
instance.name = instance.keyword
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['r'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls, instance):
try:
keyword_value = []
QUERY_KEY = "query:search_tip"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
query_sql_item = wordresemble.WordRel.objects.filter(keyword=instance.keyword)
for sql_obj in query_sql_item:
count = 0
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
query_base64 = base64.b64encode(instance.keyword.encode('utf8')).decode('utf8')
for items in words:
count += 1
wordresemble_value = []
# 先存储本体词
item_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取该同义词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
logging.info("get redis_data:%s" % redis_data)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {item_name: total_count}
keyword_value.append(value)
redis_data = redis_client.hget(tips_num_redis_key_prefix, instance.keyword)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {query_base64: total_count}
wordresemble_value.append(value)
if count == 1:
keyword_value.append(value)
if words:
for w in words:
value_name_w = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = {value_name_w: total_count}
wordresemble_value.append(value)
if len(wordresemble_value) > 0:
logging.info("get type wordresemble_value:%s" % type(json.dumps(wordresemble_value)))
redis_client.hset(QUERY_KEY, item_name, json.dumps(wordresemble_value))
if len(keyword_value) > 0:
logging.info("get type keyword_value:%s" % type(json.dumps(keyword_value)))
redis_client.hset(QUERY_KEY, query_base64, json.dumps(keyword_value))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment