Commit 3bbe7357 authored by lixiaofang's avatar lixiaofang

Merge branch 'associate' into dev

parents dff7a143 c068fc92
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
import os
# set the default Django settings module for the 'celery' program.
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
import raven
from raven.contrib.celery import register_signal, register_logger_signal
from celery import Celery
from django.conf import settings
class Celery(Celery):
"""wrap for celery.Celery."""
def on_configure(self):
# check if sentry settings provided
if not settings.SENTRY_CELERY_ENDPOINT:
return
client = raven.Client(settings.SENTRY_CELERY_ENDPOINT)
# register a custom filter to filter out duplicate logs
register_logger_signal(client)
# hook into the Celery error handler
register_signal(client)
app = Celery('search_tips_tasks')
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
......@@ -9,5 +9,7 @@
<config name="initializer_list">
<element value="search_tips.django_init"/>
<element value="search.view.auto_tips"/>
<element value="associate.search.views.auto_tips"/>
<element value="associate.search.views.hotword_result"/>
</config>
</gm_rpcd_config>
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class Trans2EsConfig(AppConfig):
name = 'associate'
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
import pypinyin
from pypinyin import lazy_pinyin
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid
def uuid4():
"""
:return:
"""
return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name):
try:
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000
py_acronym_prefix_weight = 2
ch_prefix_weight = 1.5
py_prefix_weight = 1.0
# 命中开始部分加权
begin_prefix_weight = 1.2 * 1000
ch_full_word = instance_cn_name.strip()
py_full_word = ''.join(lazy_pinyin(ch_full_word))
py_acronym_full_word = ''.join(lazy_pinyin(ch_full_word, style=pypinyin.FIRST_LETTER))
suggest_dict = dict()
cur_index = 0
# 中文
for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if ch_name_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[ch_name_term[0]] = suggest_item
else:
suggest_dict[ch_name_term[0]]["input"].append(ch_name_term)
if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]:
suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音
if py_full_word != ch_full_word:
for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_name_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[py_name_term[0]] = suggest_item
else:
suggest_dict[py_name_term[0]]["input"].append(py_name_term)
if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]:
suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 简写
if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_acronym_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[py_acronym_term[0]] = suggest_item
else:
suggest_dict[py_acronym_term[0]]["input"].append(py_acronym_term)
if suggest_item["word_weight"] > suggest_dict[py_acronym_term[0]]["word_weight"]:
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
import json
from libs.cache import redis_client
class TagTab(object):
Weight = {
'search': 0.4,
'trans': 0.6
}
SearchScore = {
0.7: 100,
0.1: 80,
0.05: 60,
0.01: 40,
0: 20,
}
TransScore = {
0.7: 100,
0.5: 80,
0.3: 60,
0.1: 40,
0: 20
}
def cal_score(self, search_rate, conversion_rate):
s1 = self._cal_score(search_rate, 'SearchScore') * self.Weight['search']
s2 = self._cal_score(conversion_rate, 'TransScore') * self.Weight['trans']
return s1 + s2
def _cal_score(self, item, type_):
item *= 100.0
scoreweight = getattr(self, type_)
for k in sorted(scoreweight.keys(), reverse=True):
if item >= k:
return scoreweight[k]
class DocTab(TagTab):
SearchScore = {
0.04: 100,
0.01: 80,
0.001: 60,
0.0002: 40,
0: 20,
}
TransScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
class HosTab(TagTab):
SearchScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
TransScore = {
1: 100,
0.45: 90,
0.27: 80,
0.21: 70,
0.15: 60,
0.12: 50,
0.09: 40,
0.06: 30,
0.04: 20,
0: 10,
}
class QueryWordAttr(object):
# 获取TagConversion表最新的日期
tag_latest_date = None
doctor_latest_date = None
hospital_latest_date = None
tips_num_redis_key_prefix = "query:associate_tip:tag_id:"
tagtab = TagTab()
doctab = DocTab()
hostab = HosTab()
@classmethod
def get_query_results_num(cls, id):
try:
key = cls.tips_num_redis_key_prefix + str(id)
results_num = redis_client.get(key)
# results_num = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
return int(results_num)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import django.db.models
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
import six
import sys
from libs.es import ESPerform
from libs.table_scan import TableSlicer, TableSlicerChunk
from associate.type_info import get_type_info_map, TypeInfo
class Job(object):
__es = None
def __init__(self, sub_index_name, type_name, chunk):
assert isinstance(sub_index_name, six.string_types)
assert isinstance(type_name, six.string_types)
assert isinstance(chunk, TableSlicerChunk)
self._sub_index_name = sub_index_name
self._type_name = type_name
self._chunk = chunk
@classmethod
def get_es(cls):
if cls.__es is None:
cls.__es = ESPerform().get_cli()
return cls.__es
def __call__(self):
type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name,
table_chunk=self._chunk,
es=self.get_es(),
)
class Command(BaseCommand):
args = ''
help = 'dump data to elasticsearch, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type', dest='sync_type', help='sync data to es', metavar='TYPE', default='')
)
def __sync_data_by_type(self, type_name):
try:
type_info = get_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
job = Job(
sub_index_name=type_info.name,
type_name=type_name,
chunk=chunk,
)
job()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options):
try:
type_name_list = get_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
logging.info("begin sync [%s] data to es!" % type_name)
self.__sync_data_by_type(type_name)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.es import ESPerform
from associate.type_info import get_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump mapping to elasticsearch'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-T', '--indices_template', dest='indices_template',
help='index template name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
)
def handle(self, *args, **options):
try:
es_cli = ESPerform.get_cli()
type_name = "associate_tag"
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
type_name = options["type"]
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
if ESPerform.put_indices_template(es_cli=es_cli, template_file_name=template_file_name,
template_name=template_file_name):
logging.info("put indices template suc!")
else:
logging.error("put indices template err!")
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.table_scan import TableSlicer
from associate.type_info import tips_attr_sync_to_redis_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump data to redis, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type', dest='sync_type', help='sync data to es', metavar='TYPE', default='')
)
def handle(self, *args, **options):
try:
type_name_list = tips_attr_sync_to_redis_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
logging.info("begin sync [%s] data to redis!" % type_name)
type_info = tips_attr_sync_to_redis_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
for instance in list(chunk):
logging.info("get instance:%s" % instance)
type_info.get_data_func(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
{
"dynamic":"strict",
"properties": {
"id":{"type":"text"},
"suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion",
"contexts":[
{
"name":"is_online",
"type": "category"
}
]
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"agile_tag_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
"ori_name":{"type":"keyword"},//原名称
"results_num":{"type":"integer"},//结果数量
"is_online":{"type":"boolean"},//上线
"offline_score":{"type":"double"},//离线分
"agile_tag_id":{"type":"long"},//标签ID
"create_tag_type":{"type":"long"},//标签创建类型
"style":{"type":"long"},//新标签样式
"topic_recommend_sort":{"type":"long"}
}
}
from django.db import models
# Create your models here.
from __future__ import unicode_literals, absolute_import, print_function
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import traceback
import base64
from django.db import models
from gm_types.gaia import (
AGILE_TAG_TYPE,
AGILE_TAG_CREATE_TYPE,
AGILE_TAG_STYLE,
AGILE_TAG_RECOMMEND_TYPE,
)
class BaseModel(models.Model):
class Meta:
abstract = True
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', auto_now_add=True)
update_time = models.DateTimeField(verbose_name=u'更新时间', auto_now=True)
class AgileTag(BaseModel):
class Meta:
verbose_name = u'新标签'
db_table = 'api_agile_tag'
app_label = 'api'
name = models.CharField(verbose_name=u'新标签名字', max_length=128, null=False, unique=True, default='')
description = models.TextField(verbose_name=u'描述', default='')
create_tag_type = models.CharField(verbose_name=u"标签创建类型", max_length=3, choices=AGILE_TAG_CREATE_TYPE)
style = models.CharField(verbose_name=u"标签样式", max_length=3, choices=AGILE_TAG_STYLE)
topic_recommend_sort = models.IntegerField(verbose_name=u'帖子推荐排序', default=9999)
class AgileTagType(BaseModel):
class Meta:
verbose_name = u'新标签类型(可多选)'
db_table = 'api_agile_tag_type'
app_label = 'api'
agile_tag_id = models.IntegerField(verbose_name=u'新标签', db_index=True)
agile_tag_type = models.CharField(verbose_name=u"标签类型", max_length=3, choices=AGILE_TAG_TYPE)
@property
def get_by_id_name(self):
try:
results = AgileTag.objects.filter(id=self.agile_tag_id).values()
return results[0]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
from django.contrib import admin
# Register your models here.
from __future__ import unicode_literals
from django.apps import AppConfig
class SearchConfig(AppConfig):
name = 'search'
from __future__ import unicode_literals
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
from libs.es import ESPerform
def get_suggest_tips(query, agile_tag_type):
try:
# ios输入法在某些情况下会携带\\u2006
query = query.replace("\u2006", '')
q = {
"suggest": {
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size": 50,
"contexts": {
"is_online": [True]
},
"fuzzy": {
"fuzziness": 0
}
}
}
}
}
logging.info("get qqqqqqqq:%s" % q)
have_read_tips_set = set()
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="associate_tag", query_body=q,
offset=0, size=50, is_suggest_request=True)
agile_type_id = set()
logging.info("get agile_tag_type:%s" % agile_tag_type)
if agile_tag_type > 0:
q = {
"query": {
"term": {
"agile_tag_type": agile_tag_type
}
}
}
result_dicts = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="associate_tag",
query_body=q,
offset=0, size=100, is_suggest_request=True)
logging.info("get result_dict_type:%s" % result_dicts)
for tips_item in result_dicts["hits"]["hits"]:
agile_type_id.add(tips_item["_source"]["agile_tag_id"])
logging.info("get agile_type_id:%s" % agile_type_id)
# 获取这个类型
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
if hit_item["_source"]["ori_name"] not in have_read_tips_set:
agile_tag_id = hit_item["_source"]["agile_tag_id"]
if agile_tag_id in agile_type_id:
have_read_tips_set.add(hit_item["_source"]["ori_name"])
highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,
highlight_marks)
ori_name = hit_item["_source"]["ori_name"]
results_num = hit_item["_source"]["results_num"]
highlight_name = hit_item["_source"]["highlight_name"]
ret_list.append(
[{"ori_name": ori_name, "results_num": results_num, "highlight": highlight_name}])
return ret_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import json_http_response
from django.shortcuts import render
from search.utils.auto_tips import get_suggest_tips
def auto_complete(request):
try:
"""auto complate words/tags/doctors etc.
URL:
~/search/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
q = request.GET.get('q', '').strip()
lat = request.GET.get('lat', 0).strip()
lng = request.GET.get('lng', 0).strip()
if not q:
return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q, float(lat), float(lng))
result = {
'error': 0,
'data': data,
}
logging.info("duan add,q is:%s,result:%s" % (str(q).encode("utf-8"),str(result).encode('utf-8')))
return json_http_response(result)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return json_http_response({'error': 1, 'data': []})
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
from associate.search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_associate_query")
def auto_associate_query(query, agile_tag_type=-1):
try:
"""auto complate words/tags/doctors etc.
URL:
~/api/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
# q = request.GET.get('q', '').strip()
# if not q:
# return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(query, agile_tag_type)
result = {
'error': 0,
'data': data,
}
return result
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error": 1, "data": []}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
from associate.commons.words_utils import QueryWordAttr
@bind("search_tips/search/tag_hotword_num")
def get_hotword_num(tag_id_list):
try:
tag_id_num = list()
for id in tag_id_list:
result = QueryWordAttr.get_query_results_num(id)
tag_id_num.append({"tag_id": id, "hot_num": result})
return {"tag_hotword_num": tag_id_num}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_hotword_num": []}
from django.test import TestCase
# Create your tests here.
This diff is collapsed.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
import base64
from associate.models import agile_tag
from associate.commons.commons import get_tips_suggest_list
from django.conf import settings
from associate.commons.words_utils import QueryWordAttr
class TagTransfer(object):
@classmethod
def get_tag_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
results = instance.get_by_id_name
name = results["name"].strip()
item_dict["id"] = getMd5Digest(str(name))
item_dict["ori_name"] = name
item_dict["is_online"] = results["is_online"]
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.agile_tag_id)
item_dict["agile_tag_type"] = instance.agile_tag_type
item_dict["agile_tag_id"] = instance.agile_tag_id
item_dict["create_tag_type"] = results["create_tag_type"]
item_dict["style"] = results["style"]
item_dict["topic_recommend_sort"] = results["topic_recommend_sort"]
item_dict["offline_score"] = 0.0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(name)
logging.info("get ret_list:%s" % ret_list)
logging.info("get suggest_list:%s" % ret_list)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
@classmethod
def get_tractate_data_name_mapping_results_to_redis(cls, name):
try:
tag_name = name
q = dict()
if tag_name:
multi_fields = {
"content": 6,
"tractate_tag_name": 3,
"tractate_tag_name_content": 4,
"author": 2, }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="tractate", doc_type="tractate", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
return doctor_results
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(self, instance):
try:
query_key = "query:associate_tip:tag_id:"
name = agile_tag.AgileTag.objects.filter(id=instance.agile_tag_id).values_list("name", flat=True)[0]
tractate_results = TagTransfer.get_tractate_data_name_mapping_results_to_redis(name)
key = query_key + str(instance.agile_tag_id)
redis_client.set(key, tractate_results)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
from django.shortcuts import render
# Create your views here.
import sys
from gm_rpcd.commands.utils import add_cwd_to_path
from gm_rpcd.internals.utils import serve
def main(args):
add_cwd_to_path()
from gm_rpcd.internals.configuration import config
config.is_develop_mode = True
config.freeze()
host = '127.0.0.1'
port = 9000
try:
first_arg = args[0]
except IndexError:
pass
else:
if ':' in first_arg:
host, port = first_arg.split(':')
port = int(port)
else:
port = int(first_arg)
print('Serving on {}:{}'.format(host, port))
serve(host=host, port=port)
if __name__ == '__main__':
main(sys.argv[1:])
from django.contrib import admin
# Register your models here.
default_app_config = 'injection.data_sync.apps.DataSyncApp'
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
from django.apps import AppConfig
class DataSyncApp(AppConfig):
name = 'injection.data_sync'
label = 'injected_data_sync'
# -*- coding: UTF-8 -*-
import logging
import traceback
import json
import pymysql
import threading
import random
import datetime
from celery import shared_task
from django.conf import settings
from django.core import serializers
from trans2es.type_info import get_type_info_map,TypeInfo
# from rpc.all import get_rpc_remote_invoker
from libs.es import ESPerform
from libs.cache import redis_client
@shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False):
try:
pk_list = list(frozenset(pk_list))
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("consume es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=type_info.name,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
\ No newline at end of file
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
# Create your views here.
......@@ -17,9 +17,8 @@ class ESPerform(object):
cli_info_list = settings.ES_INFO_LIST
index_prefix = settings.ES_INDEX_PREFIX
@classmethod
def get_cli(cls,es_ip_list=None):
def get_cli(cls, es_ip_list=None):
try:
es_info_list = es_ip_list if es_ip_list else cls.cli_info_list
# if es_ip_list:#tricky的做法,兼容测试es
......@@ -32,7 +31,7 @@ class ESPerform(object):
return None
@classmethod
def get_official_index_name(cls,sub_index_name,index_flag=None):
def get_official_index_name(cls, sub_index_name, index_flag=None):
"""
:remark:get official es index name
:param sub_index_name:
......@@ -40,7 +39,7 @@ class ESPerform(object):
:return:
"""
try:
assert (index_flag in [None,"read","write"])
assert (index_flag in [None, "read", "write"])
official_index_name = cls.index_prefix + "-" + sub_index_name
if index_flag:
......@@ -52,11 +51,17 @@ class ESPerform(object):
return None
@classmethod
def __load_mapping(cls,doc_type):
def __load_mapping(cls, doc_type):
try:
mapping_file_path = os.path.join(
os.path.dirname(__file__),
'..', 'trans2es','mapping', '%s.json' % (doc_type,))
'..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
if doc_type == "associate_tag":
mapping_file_path = os.path.join(
os.path.dirname(__file__),
'..', 'associate', 'mapping', '%s.json' % (doc_type,))
mapping = ''
with open(mapping_file_path, 'r') as f:
for line in f:
......@@ -69,7 +74,7 @@ class ESPerform(object):
return None
@classmethod
def create_index(cls,es_cli,sub_index_name):
def create_index(cls, es_cli, sub_index_name):
"""
:remark: create es index,alias index
:param sub_index_name:
......@@ -82,11 +87,11 @@ class ESPerform(object):
index_exist = es_cli.indices.exists(official_index_name)
if not index_exist:
es_cli.indices.create(official_index_name)
read_alias_name = cls.get_official_index_name(sub_index_name,"read")
es_cli.indices.put_alias(official_index_name,read_alias_name)
read_alias_name = cls.get_official_index_name(sub_index_name, "read")
es_cli.indices.put_alias(official_index_name, read_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
es_cli.indices.put_alias(official_index_name,write_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name, "write")
es_cli.indices.put_alias(official_index_name, write_alias_name)
return True
except:
......@@ -94,7 +99,7 @@ class ESPerform(object):
return False
@classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"):
def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc"):
"""
:remark: put index mapping
:param es_cli:
......@@ -105,13 +110,12 @@ class ESPerform(object):
try:
assert (es_cli is not None)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
write_alias_name = cls.get_official_index_name(sub_index_name, "write")
index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist:
return False
mapping_dict = cls.__load_mapping(sub_index_name)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type)
return True
except:
......@@ -119,7 +123,7 @@ class ESPerform(object):
return False
@classmethod
def put_indices_template(cls,es_cli,template_file_name, template_name):
def put_indices_template(cls, es_cli, template_file_name, template_name):
"""
:remark put index template
:param es_cli:
......@@ -131,7 +135,7 @@ class ESPerform(object):
assert (es_cli is not None)
mapping_dict = cls.__load_mapping(template_file_name)
es_cli.indices.put_template(name=template_name,body=mapping_dict)
es_cli.indices.put_template(name=template_name, body=mapping_dict)
return True
except:
......@@ -139,19 +143,19 @@ class ESPerform(object):
return False
@classmethod
def es_helpers_bulk(cls,es_cli,data_list,sub_index_name,auto_create_index=False,doc_type="_doc"):
def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name)
official_index_name = cls.get_official_index_name(sub_index_name, "write")
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,bulk data error!" % official_index_name)
return False
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli, sub_index_name)
bulk_actions = []
for data in data_list:
......@@ -162,7 +166,7 @@ class ESPerform(object):
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions)
elasticsearch.helpers.bulk(es_cli, bulk_actions)
return True
except:
......@@ -170,41 +174,43 @@ class ESPerform(object):
return False
@classmethod
def get_search_results(cls, es_cli,sub_index_name,query_body,offset=0,size=10,
auto_create_index=False,doc_type="_doc",aggregations_query=False,is_suggest_request=False,batch_search=False):
def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10,
auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False,
batch_search=False):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name,"read")
official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli, sub_index_name)
logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8"))
if not batch_search:
res = es_cli.search(index=official_index_name,doc_type=doc_type,body=query_body,from_=offset,size=size)
res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset,
size=size)
if is_suggest_request:
return res
else:
result_dict = {
"total_count":res["hits"]["total"],
"hits":res["hits"]["hits"]
"total_count": res["hits"]["total"],
"hits": res["hits"]["hits"]
}
if aggregations_query:
result_dict["aggregations"] = res["aggregations"]
return result_dict
else:
res = es_cli.msearch(body=query_body,index=official_index_name, doc_type=doc_type)
res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type)
logging.info("duan add,msearch res:%s" % str(res))
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]}
\ No newline at end of file
return {"total_count": 0, "hits": []}
......@@ -71,8 +71,8 @@ class HostpitalPosInfo(object):
def get_hostpital_info(cls):
try:
if len(cls.hospital_pos_dict) == 0:
query = doctor.Hospital.objects.all().query
query_results = django.db.models.QuerySet(model=doctor.Hospital, query=query)
query_results = doctor.Hospital.objects.filter(is_online=True)
#query_results = django.db.models.QuerySet(model=doctor.Hospital, query=query)
for item in query_results:
name = item.name
lng = float(item.baidu_loc_lng)
......
......@@ -2,75 +2,111 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
import base64
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance
from gm_rpcd.all import bind
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def get_suggest_tips(query,lat,lng,offset=0,size=50):
def get_suggest_tips(query, lat, lng, offset=0, size=50):
try:
# ios输入法在某些情况下会携带\\u2006
query = query.replace("\u2006", '')
q = {
"suggest":{
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size": size,
"contexts":{
"is_online":[True]
},
"fuzzy":{
"fuzziness": 0
}
}
"suggest": {
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size": size,
"contexts": {
"is_online": [True]
},
"fuzzy": {
"fuzziness": 0
}
}
}
},
"_source": {
"includes": ["id", "ori_name", "offline_score", "is_online","type_flag","results_num"]
"includes": ["id", "ori_name", "offline_score", "is_online", "type_flag", "results_num"]
}
}
logging.info("get qqqqqqqq:%s" % q)
have_read_tips_set = set()
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="suggest",query_body=q,offset=offset,size=size,is_suggest_request=True)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
offset=offset, size=size, is_suggest_request=True)
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
if hit_item["_source"]["ori_name"] not in have_read_tips_set:
have_read_tips_set.add(hit_item["_source"]["ori_name"])
highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,highlight_marks)
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,
highlight_marks)
if hit_item["_source"]["type_flag"] == "hospital":
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng,lat,g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000*50:
if distance < 1000:
if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米"
if lat is not None and lng is not None and lat != 0.0 and lng != 0.0:
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng, lat,
g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],
g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000 * 50:
if distance < 1000:
if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米"
else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
hit_item["_source"]["describe"] = "约" + str(
round(1.0 * distance / 1000, 1)) + "km"
else:
hit_item["_source"]["describe"] = "约" + str(round(1.0*distance/1000,1)) + "km"
hit_item["_source"]["describe"] = ">50km"
else:
hit_item["_source"]["describe"] = ">50km"
hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = ""
else:
if hit_item["_source"]["type_flag"] == "doctor":
hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else ""
hit_item["_source"]["describe"] = "约" + str(hit_item["_source"]["results_num"]) + "个结果" if \
hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"])
return ret_list
if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list
else:
QUERY_KEY = "query:{}:search_tip"
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
for i in labels:
ori = i.split(":")[0]
ori_name = base64.b64decode(ori.encode('utf8')).decode('utf8')
if ori_name not in have_read_tips_set:
result_num = i.split(":")[1]
ret_list.append(
{"results_num": result_num, "ori_name": ori_name, "id": None, "is_online": True,
"offline_score": 0,
"type_flag": get_tips_word_type(ori_name), "highlight_name": ori_name, "describe": ""})
if len(ret_list) >= 50:
return ret_list[0:50]
else:
return ret_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
......@@ -12,9 +12,8 @@ from libs.tools import json_http_response
from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query")
def auto_complete_query(q,lat,lng):
def auto_complete_query(q, lat, lng):
try:
"""auto complate words/tags/doctors etc.
......@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng):
# if not q:
# return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q,float(lat),float(lng))
data = get_suggest_tips(q, float(lat), float(lng))
result = {
'error': 0,
......@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng):
return result
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error":1,"data":[]}
return {"error": 1, "data": []}
from __future__ import unicode_literals, absolute_import, print_function
import pymysql
from _celery import app as celery_app
pymysql.install_as_MySQLdb()
\ No newline at end of file
No preview for this file type
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from django.conf import settings
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
app = Celery('search_tips')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.conf.broker_url = settings.CELERY_BROKER_URL
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
from django.conf import settings
import itertools
import logging
class CeleryTaskRouter(object):
queue_task_map = {
"tapir-search_tips": [
'injection.data_sync.tasks.write_to_es',
]
}
# Map[TaskName, QueueName]
task_queue_map = dict(itertools.chain.from_iterable(
[(task, queue) for task in task_list]
for (queue, task_list) in queue_task_map.items()
))
def route_for_task(self, task, args=None, kwargs=None):
"""
if settings.DEBUG:
return None
if task.startswith("statistic") or task.startswith("api.tasks.export_excel_task"):
return "slow"
"""
queue_name_or_none = self.task_queue_map.get(task)
return queue_name_or_none
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
LOG_DIR = '/data/log/search_tips/app/'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s',
filename=os.path.join(LOG_DIR, 'filelog.log'),
)
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'filters': {
'require_debug_true': {
'()': 'django.utils.log.RequireDebugTrue',
},
},
'formatters': {
'verbose': {
'format': '%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s'
},
'simple': {
'format': '%(levelname)s %(message)s'
},
'profile': {
'format': '%(asctime)s %(message)s'
},
'raw': {
'format': '%(message)s'
}
},
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
# 默认的服务器Log(保存到log/filelog.log中, 通过linux的logrotate来处理日志的分割
'default': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'filelog.log'),
'formatter': 'verbose',
},
# 默认的服务器ERROR log
'default_err': {
'level': 'ERROR',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'error_logger.log'),
'formatter': 'verbose',
},
'exception_logger': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'exception_logger.log'),
'formatter': 'verbose',
},
'tracer_handler': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'tracer.log'),
'formatter': 'raw'
},
},
'loggers': {
'django': {
'handlers': ['default'],
'propagate': True,
'level': 'INFO',
},
'django.request': {
'handlers': ['default_err'],
'level': 'ERROR',
'propagate': False,
},
'exception_logger': {
'handlers': ['exception_logger'],
'level': 'INFO',
'propagate': False,
},
'gm_tracer.subscribe': {
'handlers': ['tracer_handler'],
'propagate': False,
'level': 'INFO'
},
},
}
......@@ -48,7 +48,7 @@ DEBUG = True
# }
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
CELERY_ROUTES = ['search_tips.celery_task_router.CeleryTaskRouter']
# Application definition
......@@ -60,7 +60,9 @@ INSTALLED_APPS = [
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search'
'search',
'injection.data_sync',
"associate"
]
MIDDLEWARE = [
......
No preview for this file type
......@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid
def uuid4():
"""
......@@ -19,10 +17,11 @@ def uuid4():
"""
return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name):
try:
#ch_full_weight = 6.0 * 1000
#py_full_weight = 3.0 * 1000
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000
......@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name):
# 中文
for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i]!="(" and ch_full_word[i]!=")":
if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if ch_name_term[0] not in suggest_dict:
......@@ -59,18 +59,19 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[ch_name_term[0]]["input"].append(ch_name_term)
if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]:
suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音
if py_full_word != ch_full_word:
for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i]!="(" and py_full_word[i]!=")":
if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight * term_begin_prefix_weight,
"word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
......@@ -82,20 +83,21 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_name_term[0]]["input"].append(py_name_term)
if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]:
suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 简写
# 简写
if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i]!="(" and py_acronym_full_word[i]!=")":
if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_acronym_term[0] not in suggest_dict:
......@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
......@@ -5,7 +5,8 @@ import logging
import traceback
import json
from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion
from trans2es.models.query_word_conversion import TagConversion, DoctorConversion, HospitalConversion
class TagTab(object):
Weight = {
......@@ -84,7 +85,6 @@ class HosTab(TagTab):
class QueryWordAttr(object):
# 获取TagConversion表最新的日期
tag_latest_date = None
doctor_latest_date = None
......@@ -97,12 +97,12 @@ class QueryWordAttr(object):
hostab = HosTab()
@classmethod
def get_project_query_word_weight(cls,name):
def get_project_query_word_weight(cls, name):
try:
if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date)
tag_query_results = TagConversion.objects.filter(query=name, update_date=cls.tag_latest_date)
total_score = 0.0
for query_item in tag_query_results:
......@@ -120,7 +120,7 @@ class QueryWordAttr(object):
if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date)
doc_query_results = DoctorConversion.objects.filter(query=name, update_date=cls.doctor_latest_date)
total_score = 0.0
for query_item in doc_query_results:
......@@ -138,7 +138,7 @@ class QueryWordAttr(object):
if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date)
hospital_query_results = HospitalConversion.objects.filter(query=name, update_date=cls.hospital_latest_date)
total_score = 0.0
for query_item in hospital_query_results:
......@@ -151,10 +151,10 @@ class QueryWordAttr(object):
return 0.0
@classmethod
def get_query_results_num(cls,name):
def get_query_results_num(cls, name):
try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num
......@@ -168,17 +168,17 @@ from libs.cache import redis_client
import base64
from gm_types.doris import MIND_TYPE
QUERY_KEY = "query:{}:set"
LABEL_VALUE = {
MIND_TYPE.PROJECT : 6,
MIND_TYPE.BRAND: 5,
MIND_TYPE.HOSPITAL:4,
MIND_TYPE.DOCTOR:3,
MIND_TYPE.AREA:2,
MIND_TYPE.USER:1,
MIND_TYPE.UNKNOWN:0
MIND_TYPE.PROJECT: 8,
MIND_TYPE.HOSPITAL: 7,
MIND_TYPE.DOCTOR: 6,
MIND_TYPE.FREE_FACE: 4,
MIND_TYPE.FACE: 3,
MIND_TYPE.AREA: 2,
MIND_TYPE.USER: 1,
MIND_TYPE.UNKNOWN: 0
}
QUERY_WORD_LABEL_NEED_MODIFIED = {
......@@ -189,6 +189,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = {
u"脱毛": MIND_TYPE.PROJECT
}
def label_key(label):
return LABEL_VALUE.get(label)
......@@ -198,11 +199,7 @@ def get_tips_word_type(query=''):
key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
labels.append(MIND_TYPE.UNKNOWN)
if query in QUERY_WORD_LABEL_NEED_MODIFIED:
labels.append(MIND_TYPE.PROJECT)
labels.sort(key=label_key, reverse=True)
return labels[0]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.es import ESPerform
from trans2es.type_info import get_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump mapping to elasticsearch'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-T', '--indices_template', dest='indices_template',
help='index template name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
)
def handle(self, *args, **options):
try:
es_cli = ESPerform.get_cli()
type_name1 = "suggest-v1"
type_name = "suggest"
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"] or type_name1 == options["type"]:
type_name = options["type"]
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
if ESPerform.put_indices_template(es_cli=es_cli, template_file_name=template_file_name,
template_name=template_file_name):
logging.info("put indices template suc!")
else:
logging.error("put indices template err!")
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
{
"dynamic":"strict",
"properties": {
"id":{"type":"text"},
"suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion",
"contexts":[
{
"name":"is_online",
"type": "category"
}
]
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"tips_name_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
"ori_name":{"type":"keyword"},//原名称
"order_weight":{"type":"double"},//订单权重
"offline_score":{"type":"double"},//离线分
"results_num":{"type":"integer"},//结果数量
"type_flag":{"type":"keyword"},
"is_online":{"type":"boolean"}//上线
}
}
......@@ -3,6 +3,8 @@
"properties": {
"id":{"type":"text"},
"suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion",
"contexts":[
{
......@@ -20,4 +22,4 @@
"type_flag":{"type":"keyword"},
"is_online":{"type":"boolean"}//上线
}
}
\ No newline at end of file
}
......@@ -34,4 +34,4 @@ class WordRelResemble(models.Model):
db_table = 'api_wordrelresemble'
wordrel = models.ForeignKey(WordRel, related_name='all_resembles')
word = models.CharField(u'近义词', max_length=50, db_index=True)
word = models.CharField(u'近义词', max_length=50, db_index=True)
\ No newline at end of file
This diff is collapsed.
......@@ -4,15 +4,15 @@ import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class DoctorTransfer(object):
......@@ -20,14 +20,15 @@ class DoctorTransfer(object):
def get_doctor_data_name_mapping_results_to_redis(cls, instance):
try:
doctor_name = instance.name.strip()
if doctor_name:
multi_fields = {
'hospital.city_province_name': 1,
'hospital.name': 1,
'name': 1,
'hospital.city_name':1,
'hospital.officer_name':1,
'service_closure_tags':1
'hospital.city_name': 1,
'hospital.officer_name': 1,
'service_closure_tags': 1
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
......@@ -39,12 +40,12 @@ class DoctorTransfer(object):
q = {
"size": 0,
"query":{
"bool":{
"should":[
"query": {
"bool": {
"should": [
{"multi_match": multi_match}
],
"must":[
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
......@@ -52,14 +53,16 @@ class DoctorTransfer(object):
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="doctor", doc_type="doctor", query_body=q,offset=0,size=0)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="doctor", doc_type="doctor", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,doctor_name)
redis_data = redis_client.hget(redis_key_prefix, doctor_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
......@@ -68,33 +71,39 @@ class DoctorTransfer(object):
redis_val_dict['t'] = total_count
redis_val_dict['d'] = doctor_results
logging.info("duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix),str(redis_val_dict)))
redis_client.hset(redis_key_prefix,doctor_name, json.dumps(redis_val_dict))
logging.info(
"duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix), str(redis_val_dict)))
redis_client.hset(redis_key_prefix, doctor_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_doctor_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 2
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
keyword = instance.name
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 2
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
return ([], [])
......@@ -4,13 +4,14 @@ import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class HospitalTransfer(object):
......@@ -19,35 +20,42 @@ class HospitalTransfer(object):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
name = instance.name.strip()
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if name.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(name))
item_dict["ori_name"] = name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(name)
item_dict["type_flag"] = get_tips_word_type(name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(name)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
return ([], [])
@classmethod
def get_hospital_lat_lng_info_to_redis(cls, instance):
try:
redis_key_prefix = "search_tips:hospital_latlng_info"
name = instance.name
name = instance.name.strip()
lng = instance.baidu_loc_lng
lat = instance.baidu_loc_lat
pos_list = [lng,lat]
redis_client.hset(redis_key_prefix,name,pos_list)
pos_list = [lng, lat]
redis_client.hset(redis_key_prefix, name, pos_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -4,15 +4,14 @@ import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class ItemWikiTransfer(object):
......@@ -44,12 +43,12 @@ class ItemWikiTransfer(object):
q = {
"size": 0,
"query":{
"bool":{
"should":[
"query": {
"bool": {
"should": [
{"multi_match": multi_match}
],
"must":[
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
......@@ -57,28 +56,29 @@ class ItemWikiTransfer(object):
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="newwiki", doc_type="newwiki", query_body=q,offset=0,size=0)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="newwiki", doc_type="newwiki", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,wiki_name)
redis_data = redis_client.hget(redis_key_prefix, wiki_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 'w' in redis_val_dict:
total_count += int(redis_val_dict['w'])
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['w'] = doctor_results
redis_client.hset(redis_key_prefix,wiki_name, json.dumps(redis_val_dict))
redis_client.hset(redis_key_prefix, wiki_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_itemwiki_suggest_data_list(cls, instance):
try:
......@@ -99,7 +99,7 @@ class ItemWikiTransfer(object):
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
return ([], [])
......@@ -4,18 +4,17 @@ import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class TagTransfer(object):
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod
......@@ -60,29 +59,31 @@ class TagTransfer(object):
{'multi_match': multi_match},
sku_query
],
"must":[
{"term":{"is_online":True}}
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0)
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="service", doc_type="service", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 's' in redis_val_dict:
total_count += int(redis_val_dict['s'])
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict))
redis_client.hset(cls.tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -90,23 +91,29 @@ class TagTransfer(object):
def get_tag_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
keyword = instance.name
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
return ([], [])
......@@ -4,24 +4,30 @@ import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
import base64
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
class WordResemble(object):
@classmethod
def get_word_resemble_list(cls,keyword):
def get_word_resemble_list(cls, keyword):
try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list()
for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word',flat=True)))
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word', flat=True)))
resemble_list = list()
for item in temp_list:
......@@ -30,4 +36,172 @@ class WordResemble(object):
return resemble_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
\ No newline at end of file
return list()
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
keyword = instance.keyword
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict["id"] = getMd5Digest(str(instance.keyword))
item_dict["ori_name"] = instance.keyword
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.keyword)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.keyword)
item_dict["type_flag"] = get_tips_word_type(instance.keyword)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.keyword)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
total_count = 0
instance.name = instance.keyword
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['r'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls, instance):
try:
QUERY_KEY = "query:{}:search_tip"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
# result = wordresemble.WordRel.objects.all().values("keyword", "id")
# for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=instance.keyword)
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
query_base64 = base64.b64encode(instance.keyword.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
# 存储近义词
# query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(value_name)
# value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
value_names = query_base64
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, instance.keyword)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_names + str(":" + str(total_count))
redis_client.sadd(key, value)
if words:
for w in words:
value_name_w = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name_w + str(":" + str(total_count))
redis_client.sadd(key, value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment