Commit 3bbe7357 authored by lixiaofang's avatar lixiaofang

Merge branch 'associate' into dev

parents dff7a143 c068fc92
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
import os
# set the default Django settings module for the 'celery' program.
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
import raven
from raven.contrib.celery import register_signal, register_logger_signal
from celery import Celery
from django.conf import settings
class Celery(Celery):
"""wrap for celery.Celery."""
def on_configure(self):
# check if sentry settings provided
if not settings.SENTRY_CELERY_ENDPOINT:
return
client = raven.Client(settings.SENTRY_CELERY_ENDPOINT)
# register a custom filter to filter out duplicate logs
register_logger_signal(client)
# hook into the Celery error handler
register_signal(client)
app = Celery('search_tips_tasks')
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
...@@ -9,5 +9,7 @@ ...@@ -9,5 +9,7 @@
<config name="initializer_list"> <config name="initializer_list">
<element value="search_tips.django_init"/> <element value="search_tips.django_init"/>
<element value="search.view.auto_tips"/> <element value="search.view.auto_tips"/>
<element value="associate.search.views.auto_tips"/>
<element value="associate.search.views.hotword_result"/>
</config> </config>
</gm_rpcd_config> </gm_rpcd_config>
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class Trans2EsConfig(AppConfig):
name = 'associate'
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
import pypinyin
from pypinyin import lazy_pinyin
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid
def uuid4():
"""
:return:
"""
return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name):
try:
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000
py_acronym_prefix_weight = 2
ch_prefix_weight = 1.5
py_prefix_weight = 1.0
# 命中开始部分加权
begin_prefix_weight = 1.2 * 1000
ch_full_word = instance_cn_name.strip()
py_full_word = ''.join(lazy_pinyin(ch_full_word))
py_acronym_full_word = ''.join(lazy_pinyin(ch_full_word, style=pypinyin.FIRST_LETTER))
suggest_dict = dict()
cur_index = 0
# 中文
for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if ch_name_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[ch_name_term[0]] = suggest_item
else:
suggest_dict[ch_name_term[0]]["input"].append(ch_name_term)
if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]:
suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音
if py_full_word != ch_full_word:
for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_name_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[py_name_term[0]] = suggest_item
else:
suggest_dict[py_name_term[0]]["input"].append(py_name_term)
if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]:
suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 简写
if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_acronym_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
suggest_dict[py_acronym_term[0]] = suggest_item
else:
suggest_dict[py_acronym_term[0]]["input"].append(py_acronym_term)
if suggest_item["word_weight"] > suggest_dict[py_acronym_term[0]]["word_weight"]:
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
import json
from libs.cache import redis_client
class TagTab(object):
Weight = {
'search': 0.4,
'trans': 0.6
}
SearchScore = {
0.7: 100,
0.1: 80,
0.05: 60,
0.01: 40,
0: 20,
}
TransScore = {
0.7: 100,
0.5: 80,
0.3: 60,
0.1: 40,
0: 20
}
def cal_score(self, search_rate, conversion_rate):
s1 = self._cal_score(search_rate, 'SearchScore') * self.Weight['search']
s2 = self._cal_score(conversion_rate, 'TransScore') * self.Weight['trans']
return s1 + s2
def _cal_score(self, item, type_):
item *= 100.0
scoreweight = getattr(self, type_)
for k in sorted(scoreweight.keys(), reverse=True):
if item >= k:
return scoreweight[k]
class DocTab(TagTab):
SearchScore = {
0.04: 100,
0.01: 80,
0.001: 60,
0.0002: 40,
0: 20,
}
TransScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
class HosTab(TagTab):
SearchScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
TransScore = {
1: 100,
0.45: 90,
0.27: 80,
0.21: 70,
0.15: 60,
0.12: 50,
0.09: 40,
0.06: 30,
0.04: 20,
0: 10,
}
class QueryWordAttr(object):
# 获取TagConversion表最新的日期
tag_latest_date = None
doctor_latest_date = None
hospital_latest_date = None
tips_num_redis_key_prefix = "query:associate_tip:tag_id:"
tagtab = TagTab()
doctab = DocTab()
hostab = HosTab()
@classmethod
def get_query_results_num(cls, id):
try:
key = cls.tips_num_redis_key_prefix + str(id)
results_num = redis_client.get(key)
# results_num = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
return int(results_num)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import django.db.models
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
import six
import sys
from libs.es import ESPerform
from libs.table_scan import TableSlicer, TableSlicerChunk
from associate.type_info import get_type_info_map, TypeInfo
class Job(object):
__es = None
def __init__(self, sub_index_name, type_name, chunk):
assert isinstance(sub_index_name, six.string_types)
assert isinstance(type_name, six.string_types)
assert isinstance(chunk, TableSlicerChunk)
self._sub_index_name = sub_index_name
self._type_name = type_name
self._chunk = chunk
@classmethod
def get_es(cls):
if cls.__es is None:
cls.__es = ESPerform().get_cli()
return cls.__es
def __call__(self):
type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name,
table_chunk=self._chunk,
es=self.get_es(),
)
class Command(BaseCommand):
args = ''
help = 'dump data to elasticsearch, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type', dest='sync_type', help='sync data to es', metavar='TYPE', default='')
)
def __sync_data_by_type(self, type_name):
try:
type_info = get_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
job = Job(
sub_index_name=type_info.name,
type_name=type_name,
chunk=chunk,
)
job()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options):
try:
type_name_list = get_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
logging.info("begin sync [%s] data to es!" % type_name)
self.__sync_data_by_type(type_name)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.es import ESPerform
from associate.type_info import get_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump mapping to elasticsearch'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-T', '--indices_template', dest='indices_template',
help='index template name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
)
def handle(self, *args, **options):
try:
es_cli = ESPerform.get_cli()
type_name = "associate_tag"
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
type_name = options["type"]
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
if ESPerform.put_indices_template(es_cli=es_cli, template_file_name=template_file_name,
template_name=template_file_name):
logging.info("put indices template suc!")
else:
logging.error("put indices template err!")
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.table_scan import TableSlicer
from associate.type_info import tips_attr_sync_to_redis_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump data to redis, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type', dest='sync_type', help='sync data to es', metavar='TYPE', default='')
)
def handle(self, *args, **options):
try:
type_name_list = tips_attr_sync_to_redis_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"]:
logging.info("begin sync [%s] data to redis!" % type_name)
type_info = tips_attr_sync_to_redis_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
for instance in list(chunk):
logging.info("get instance:%s" % instance)
type_info.get_data_func(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
{
"dynamic":"strict",
"properties": {
"id":{"type":"text"},
"suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion",
"contexts":[
{
"name":"is_online",
"type": "category"
}
]
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"agile_tag_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
"ori_name":{"type":"keyword"},//原名称
"results_num":{"type":"integer"},//结果数量
"is_online":{"type":"boolean"},//上线
"offline_score":{"type":"double"},//离线分
"agile_tag_id":{"type":"long"},//标签ID
"create_tag_type":{"type":"long"},//标签创建类型
"style":{"type":"long"},//新标签样式
"topic_recommend_sort":{"type":"long"}
}
}
from django.db import models
# Create your models here.
from __future__ import unicode_literals, absolute_import, print_function
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
import traceback
import base64
from django.db import models
from gm_types.gaia import (
AGILE_TAG_TYPE,
AGILE_TAG_CREATE_TYPE,
AGILE_TAG_STYLE,
AGILE_TAG_RECOMMEND_TYPE,
)
class BaseModel(models.Model):
class Meta:
abstract = True
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', auto_now_add=True)
update_time = models.DateTimeField(verbose_name=u'更新时间', auto_now=True)
class AgileTag(BaseModel):
class Meta:
verbose_name = u'新标签'
db_table = 'api_agile_tag'
app_label = 'api'
name = models.CharField(verbose_name=u'新标签名字', max_length=128, null=False, unique=True, default='')
description = models.TextField(verbose_name=u'描述', default='')
create_tag_type = models.CharField(verbose_name=u"标签创建类型", max_length=3, choices=AGILE_TAG_CREATE_TYPE)
style = models.CharField(verbose_name=u"标签样式", max_length=3, choices=AGILE_TAG_STYLE)
topic_recommend_sort = models.IntegerField(verbose_name=u'帖子推荐排序', default=9999)
class AgileTagType(BaseModel):
class Meta:
verbose_name = u'新标签类型(可多选)'
db_table = 'api_agile_tag_type'
app_label = 'api'
agile_tag_id = models.IntegerField(verbose_name=u'新标签', db_index=True)
agile_tag_type = models.CharField(verbose_name=u"标签类型", max_length=3, choices=AGILE_TAG_TYPE)
@property
def get_by_id_name(self):
try:
results = AgileTag.objects.filter(id=self.agile_tag_id).values()
return results[0]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
from django.contrib import admin
# Register your models here.
from __future__ import unicode_literals
from django.apps import AppConfig
class SearchConfig(AppConfig):
name = 'search'
from __future__ import unicode_literals
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
from libs.es import ESPerform
def get_suggest_tips(query, agile_tag_type):
try:
# ios输入法在某些情况下会携带\\u2006
query = query.replace("\u2006", '')
q = {
"suggest": {
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size": 50,
"contexts": {
"is_online": [True]
},
"fuzzy": {
"fuzziness": 0
}
}
}
}
}
logging.info("get qqqqqqqq:%s" % q)
have_read_tips_set = set()
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="associate_tag", query_body=q,
offset=0, size=50, is_suggest_request=True)
agile_type_id = set()
logging.info("get agile_tag_type:%s" % agile_tag_type)
if agile_tag_type > 0:
q = {
"query": {
"term": {
"agile_tag_type": agile_tag_type
}
}
}
result_dicts = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="associate_tag",
query_body=q,
offset=0, size=100, is_suggest_request=True)
logging.info("get result_dict_type:%s" % result_dicts)
for tips_item in result_dicts["hits"]["hits"]:
agile_type_id.add(tips_item["_source"]["agile_tag_id"])
logging.info("get agile_type_id:%s" % agile_type_id)
# 获取这个类型
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
if hit_item["_source"]["ori_name"] not in have_read_tips_set:
agile_tag_id = hit_item["_source"]["agile_tag_id"]
if agile_tag_id in agile_type_id:
have_read_tips_set.add(hit_item["_source"]["ori_name"])
highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,
highlight_marks)
ori_name = hit_item["_source"]["ori_name"]
results_num = hit_item["_source"]["results_num"]
highlight_name = hit_item["_source"]["highlight_name"]
ret_list.append(
[{"ori_name": ori_name, "results_num": results_num, "highlight": highlight_name}])
return ret_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import json_http_response
from django.shortcuts import render
from search.utils.auto_tips import get_suggest_tips
def auto_complete(request):
try:
"""auto complate words/tags/doctors etc.
URL:
~/search/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
q = request.GET.get('q', '').strip()
lat = request.GET.get('lat', 0).strip()
lng = request.GET.get('lng', 0).strip()
if not q:
return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q, float(lat), float(lng))
result = {
'error': 0,
'data': data,
}
logging.info("duan add,q is:%s,result:%s" % (str(q).encode("utf-8"),str(result).encode('utf-8')))
return json_http_response(result)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return json_http_response({'error': 1, 'data': []})
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
from associate.search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_associate_query")
def auto_associate_query(query, agile_tag_type=-1):
try:
"""auto complate words/tags/doctors etc.
URL:
~/api/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
# q = request.GET.get('q', '').strip()
# if not q:
# return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(query, agile_tag_type)
result = {
'error': 0,
'data': data,
}
return result
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error": 1, "data": []}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
from associate.commons.words_utils import QueryWordAttr
@bind("search_tips/search/tag_hotword_num")
def get_hotword_num(tag_id_list):
try:
tag_id_num = list()
for id in tag_id_list:
result = QueryWordAttr.get_query_results_num(id)
tag_id_num.append({"tag_id": id, "hot_num": result})
return {"tag_hotword_num": tag_id_num}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"tag_hotword_num": []}
from django.test import TestCase
# Create your tests here.
This diff is collapsed.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
import base64
from associate.models import agile_tag
from associate.commons.commons import get_tips_suggest_list
from django.conf import settings
from associate.commons.words_utils import QueryWordAttr
class TagTransfer(object):
@classmethod
def get_tag_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
results = instance.get_by_id_name
name = results["name"].strip()
item_dict["id"] = getMd5Digest(str(name))
item_dict["ori_name"] = name
item_dict["is_online"] = results["is_online"]
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.agile_tag_id)
item_dict["agile_tag_type"] = instance.agile_tag_type
item_dict["agile_tag_id"] = instance.agile_tag_id
item_dict["create_tag_type"] = results["create_tag_type"]
item_dict["style"] = results["style"]
item_dict["topic_recommend_sort"] = results["topic_recommend_sort"]
item_dict["offline_score"] = 0.0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(name)
logging.info("get ret_list:%s" % ret_list)
logging.info("get suggest_list:%s" % ret_list)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
@classmethod
def get_tractate_data_name_mapping_results_to_redis(cls, name):
try:
tag_name = name
q = dict()
if tag_name:
multi_fields = {
"content": 6,
"tractate_tag_name": 3,
"tractate_tag_name_content": 4,
"author": 2, }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="tractate", doc_type="tractate", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
return doctor_results
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(self, instance):
try:
query_key = "query:associate_tip:tag_id:"
name = agile_tag.AgileTag.objects.filter(id=instance.agile_tag_id).values_list("name", flat=True)[0]
tractate_results = TagTransfer.get_tractate_data_name_mapping_results_to_redis(name)
key = query_key + str(instance.agile_tag_id)
redis_client.set(key, tractate_results)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
from django.shortcuts import render
# Create your views here.
import sys
from gm_rpcd.commands.utils import add_cwd_to_path
from gm_rpcd.internals.utils import serve
def main(args):
add_cwd_to_path()
from gm_rpcd.internals.configuration import config
config.is_develop_mode = True
config.freeze()
host = '127.0.0.1'
port = 9000
try:
first_arg = args[0]
except IndexError:
pass
else:
if ':' in first_arg:
host, port = first_arg.split(':')
port = int(port)
else:
port = int(first_arg)
print('Serving on {}:{}'.format(host, port))
serve(host=host, port=port)
if __name__ == '__main__':
main(sys.argv[1:])
from django.contrib import admin
# Register your models here.
default_app_config = 'injection.data_sync.apps.DataSyncApp'
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
from django.apps import AppConfig
class DataSyncApp(AppConfig):
name = 'injection.data_sync'
label = 'injected_data_sync'
# -*- coding: UTF-8 -*-
import logging
import traceback
import json
import pymysql
import threading
import random
import datetime
from celery import shared_task
from django.conf import settings
from django.core import serializers
from trans2es.type_info import get_type_info_map,TypeInfo
# from rpc.all import get_rpc_remote_invoker
from libs.es import ESPerform
from libs.cache import redis_client
@shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False):
try:
pk_list = list(frozenset(pk_list))
type_info_map = get_type_info_map()
type_info = type_info_map[es_type]
logging.info("consume es_type:%s" % str(es_type))
type_info.insert_table_by_pk_list(
sub_index_name=type_info.name,
pk_list=pk_list,
use_batch_query_set=use_batch_query_set,
es=ESPerform.get_cli()
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
\ No newline at end of file
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
# Create your views here.
...@@ -17,9 +17,8 @@ class ESPerform(object): ...@@ -17,9 +17,8 @@ class ESPerform(object):
cli_info_list = settings.ES_INFO_LIST cli_info_list = settings.ES_INFO_LIST
index_prefix = settings.ES_INDEX_PREFIX index_prefix = settings.ES_INDEX_PREFIX
@classmethod @classmethod
def get_cli(cls,es_ip_list=None): def get_cli(cls, es_ip_list=None):
try: try:
es_info_list = es_ip_list if es_ip_list else cls.cli_info_list es_info_list = es_ip_list if es_ip_list else cls.cli_info_list
# if es_ip_list:#tricky的做法,兼容测试es # if es_ip_list:#tricky的做法,兼容测试es
...@@ -32,7 +31,7 @@ class ESPerform(object): ...@@ -32,7 +31,7 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def get_official_index_name(cls,sub_index_name,index_flag=None): def get_official_index_name(cls, sub_index_name, index_flag=None):
""" """
:remark:get official es index name :remark:get official es index name
:param sub_index_name: :param sub_index_name:
...@@ -40,7 +39,7 @@ class ESPerform(object): ...@@ -40,7 +39,7 @@ class ESPerform(object):
:return: :return:
""" """
try: try:
assert (index_flag in [None,"read","write"]) assert (index_flag in [None, "read", "write"])
official_index_name = cls.index_prefix + "-" + sub_index_name official_index_name = cls.index_prefix + "-" + sub_index_name
if index_flag: if index_flag:
...@@ -52,11 +51,17 @@ class ESPerform(object): ...@@ -52,11 +51,17 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def __load_mapping(cls,doc_type): def __load_mapping(cls, doc_type):
try: try:
mapping_file_path = os.path.join( mapping_file_path = os.path.join(
os.path.dirname(__file__), os.path.dirname(__file__),
'..', 'trans2es','mapping', '%s.json' % (doc_type,)) '..', 'trans2es', 'mapping', '%s.json' % (doc_type,))
if doc_type == "associate_tag":
mapping_file_path = os.path.join(
os.path.dirname(__file__),
'..', 'associate', 'mapping', '%s.json' % (doc_type,))
mapping = '' mapping = ''
with open(mapping_file_path, 'r') as f: with open(mapping_file_path, 'r') as f:
for line in f: for line in f:
...@@ -69,7 +74,7 @@ class ESPerform(object): ...@@ -69,7 +74,7 @@ class ESPerform(object):
return None return None
@classmethod @classmethod
def create_index(cls,es_cli,sub_index_name): def create_index(cls, es_cli, sub_index_name):
""" """
:remark: create es index,alias index :remark: create es index,alias index
:param sub_index_name: :param sub_index_name:
...@@ -82,11 +87,11 @@ class ESPerform(object): ...@@ -82,11 +87,11 @@ class ESPerform(object):
index_exist = es_cli.indices.exists(official_index_name) index_exist = es_cli.indices.exists(official_index_name)
if not index_exist: if not index_exist:
es_cli.indices.create(official_index_name) es_cli.indices.create(official_index_name)
read_alias_name = cls.get_official_index_name(sub_index_name,"read") read_alias_name = cls.get_official_index_name(sub_index_name, "read")
es_cli.indices.put_alias(official_index_name,read_alias_name) es_cli.indices.put_alias(official_index_name, read_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name,"write") write_alias_name = cls.get_official_index_name(sub_index_name, "write")
es_cli.indices.put_alias(official_index_name,write_alias_name) es_cli.indices.put_alias(official_index_name, write_alias_name)
return True return True
except: except:
...@@ -94,7 +99,7 @@ class ESPerform(object): ...@@ -94,7 +99,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"): def put_index_mapping(cls, es_cli, sub_index_name, mapping_type="_doc"):
""" """
:remark: put index mapping :remark: put index mapping
:param es_cli: :param es_cli:
...@@ -105,13 +110,12 @@ class ESPerform(object): ...@@ -105,13 +110,12 @@ class ESPerform(object):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
write_alias_name = cls.get_official_index_name(sub_index_name,"write") write_alias_name = cls.get_official_index_name(sub_index_name, "write")
index_exist = es_cli.indices.exists(write_alias_name) index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist: if not index_exist:
return False return False
mapping_dict = cls.__load_mapping(sub_index_name) mapping_dict = cls.__load_mapping(sub_index_name)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type) es_cli.indices.put_mapping(index=write_alias_name, body=mapping_dict, doc_type=mapping_type)
return True return True
except: except:
...@@ -119,7 +123,7 @@ class ESPerform(object): ...@@ -119,7 +123,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def put_indices_template(cls,es_cli,template_file_name, template_name): def put_indices_template(cls, es_cli, template_file_name, template_name):
""" """
:remark put index template :remark put index template
:param es_cli: :param es_cli:
...@@ -131,7 +135,7 @@ class ESPerform(object): ...@@ -131,7 +135,7 @@ class ESPerform(object):
assert (es_cli is not None) assert (es_cli is not None)
mapping_dict = cls.__load_mapping(template_file_name) mapping_dict = cls.__load_mapping(template_file_name)
es_cli.indices.put_template(name=template_name,body=mapping_dict) es_cli.indices.put_template(name=template_name, body=mapping_dict)
return True return True
except: except:
...@@ -139,19 +143,19 @@ class ESPerform(object): ...@@ -139,19 +143,19 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def es_helpers_bulk(cls,es_cli,data_list,sub_index_name,auto_create_index=False,doc_type="_doc"): def es_helpers_bulk(cls, es_cli, data_list, sub_index_name, auto_create_index=False, doc_type="_doc"):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name) official_index_name = cls.get_official_index_name(sub_index_name, "write")
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
if not index_exists: if not index_exists:
if not auto_create_index: if not auto_create_index:
logging.error("index:%s is not existing,bulk data error!" % official_index_name) logging.error("index:%s is not existing,bulk data error!" % official_index_name)
return False return False
else: else:
cls.create_index(es_cli,sub_index_name) cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name) cls.put_index_mapping(es_cli, sub_index_name)
bulk_actions = [] bulk_actions = []
for data in data_list: for data in data_list:
...@@ -162,7 +166,7 @@ class ESPerform(object): ...@@ -162,7 +166,7 @@ class ESPerform(object):
'_id': data['id'], '_id': data['id'],
'_source': data, '_source': data,
}) })
elasticsearch.helpers.bulk(es_cli,bulk_actions) elasticsearch.helpers.bulk(es_cli, bulk_actions)
return True return True
except: except:
...@@ -170,41 +174,43 @@ class ESPerform(object): ...@@ -170,41 +174,43 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def get_search_results(cls, es_cli,sub_index_name,query_body,offset=0,size=10, def get_search_results(cls, es_cli, sub_index_name, query_body, offset=0, size=10,
auto_create_index=False,doc_type="_doc",aggregations_query=False,is_suggest_request=False,batch_search=False): auto_create_index=False, doc_type="_doc", aggregations_query=False, is_suggest_request=False,
batch_search=False):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name,"read") official_index_name = cls.get_official_index_name(sub_index_name, "read")
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
if not index_exists: if not index_exists:
if not auto_create_index: if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name) logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None return None
else: else:
cls.create_index(es_cli,sub_index_name) cls.create_index(es_cli, sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name) cls.put_index_mapping(es_cli, sub_index_name)
logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8")) logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8"))
if not batch_search: if not batch_search:
res = es_cli.search(index=official_index_name,doc_type=doc_type,body=query_body,from_=offset,size=size) res = es_cli.search(index=official_index_name, doc_type=doc_type, body=query_body, from_=offset,
size=size)
if is_suggest_request: if is_suggest_request:
return res return res
else: else:
result_dict = { result_dict = {
"total_count":res["hits"]["total"], "total_count": res["hits"]["total"],
"hits":res["hits"]["hits"] "hits": res["hits"]["hits"]
} }
if aggregations_query: if aggregations_query:
result_dict["aggregations"] = res["aggregations"] result_dict["aggregations"] = res["aggregations"]
return result_dict return result_dict
else: else:
res = es_cli.msearch(body=query_body,index=official_index_name, doc_type=doc_type) res = es_cli.msearch(body=query_body, index=official_index_name, doc_type=doc_type)
logging.info("duan add,msearch res:%s" % str(res)) logging.info("duan add,msearch res:%s" % str(res))
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]} return {"total_count": 0, "hits": []}
\ No newline at end of file
...@@ -71,8 +71,8 @@ class HostpitalPosInfo(object): ...@@ -71,8 +71,8 @@ class HostpitalPosInfo(object):
def get_hostpital_info(cls): def get_hostpital_info(cls):
try: try:
if len(cls.hospital_pos_dict) == 0: if len(cls.hospital_pos_dict) == 0:
query = doctor.Hospital.objects.all().query query_results = doctor.Hospital.objects.filter(is_online=True)
query_results = django.db.models.QuerySet(model=doctor.Hospital, query=query) #query_results = django.db.models.QuerySet(model=doctor.Hospital, query=query)
for item in query_results: for item in query_results:
name = item.name name = item.name
lng = float(item.baidu_loc_lng) lng = float(item.baidu_loc_lng)
......
...@@ -2,75 +2,111 @@ ...@@ -2,75 +2,111 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging import logging
import traceback import traceback
import json import json
import base64
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance from libs.tools import point_distance
from gm_rpcd.all import bind
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def get_suggest_tips(query,lat,lng,offset=0,size=50): def get_suggest_tips(query, lat, lng, offset=0, size=50):
try: try:
# ios输入法在某些情况下会携带\\u2006 # ios输入法在某些情况下会携带\\u2006
query = query.replace("\u2006", '') query = query.replace("\u2006", '')
q = { q = {
"suggest":{ "suggest": {
"tips-suggest": { "tips-suggest": {
"prefix": query, "prefix": query,
"completion": { "completion": {
"field": "suggest", "field": "suggest",
"size": size, "size": size,
"contexts":{ "contexts": {
"is_online":[True] "is_online": [True]
}, },
"fuzzy":{ "fuzzy": {
"fuzziness": 0 "fuzziness": 0
} }
}
} }
}
}, },
"_source": { "_source": {
"includes": ["id", "ori_name", "offline_score", "is_online","type_flag","results_num"] "includes": ["id", "ori_name", "offline_score", "is_online", "type_flag", "results_num"]
} }
} }
logging.info("get qqqqqqqq:%s" % q)
have_read_tips_set = set() have_read_tips_set = set()
ret_list = list() ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="suggest",query_body=q,offset=offset,size=size,is_suggest_request=True) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
offset=offset, size=size, is_suggest_request=True)
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]: for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]: for hit_item in tips_item["options"]:
if hit_item["_source"]["ori_name"] not in have_read_tips_set: if hit_item["_source"]["ori_name"] not in have_read_tips_set:
have_read_tips_set.add(hit_item["_source"]["ori_name"]) have_read_tips_set.add(hit_item["_source"]["ori_name"])
highlight_marks = u'<ems>%s</ems>' % query highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,highlight_marks) hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,
highlight_marks)
if hit_item["_source"]["type_flag"] == "hospital": if hit_item["_source"]["type_flag"] == "hospital":
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict: if lat is not None and lng is not None and lat != 0.0 and lng != 0.0:
distance = point_distance(lng,lat,g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1]) if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
if distance < 1000*50: distance = point_distance(lng, lat,
if distance < 1000: g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],
if distance < 100: g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
hit_item["_source"]["describe"] = "<100" + "米" if distance < 1000 * 50:
if distance < 1000:
if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米"
else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
else: else:
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米" hit_item["_source"]["describe"] = "约" + str(
round(1.0 * distance / 1000, 1)) + "km"
else: else:
hit_item["_source"]["describe"] = "约" + str(round(1.0*distance/1000,1)) + "km" hit_item["_source"]["describe"] = ">50km"
else: else:
hit_item["_source"]["describe"] = ">50km" hit_item["_source"]["describe"] = ""
else: else:
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
else: else:
if hit_item["_source"]["type_flag"] == "doctor": if hit_item["_source"]["type_flag"] == "doctor":
hit_item["_source"]["describe"] = "" hit_item["_source"]["describe"] = ""
else: else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else "" hit_item["_source"]["describe"] = "约" + str(hit_item["_source"]["results_num"]) + "个结果" if \
hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"]) ret_list.append(hit_item["_source"])
return ret_list if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list
else:
QUERY_KEY = "query:{}:search_tip"
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
for i in labels:
ori = i.split(":")[0]
ori_name = base64.b64decode(ori.encode('utf8')).decode('utf8')
if ori_name not in have_read_tips_set:
result_num = i.split(":")[1]
ret_list.append(
{"results_num": result_num, "ori_name": ori_name, "id": None, "is_online": True,
"offline_score": 0,
"type_flag": get_tips_word_type(ori_name), "highlight_name": ori_name, "describe": ""})
if len(ret_list) >= 50:
return ret_list[0:50]
else:
return ret_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -12,9 +12,8 @@ from libs.tools import json_http_response ...@@ -12,9 +12,8 @@ from libs.tools import json_http_response
from search.utils.auto_tips import get_suggest_tips from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query") @bind("search_tips/search/auto_complete_query")
def auto_complete_query(q,lat,lng): def auto_complete_query(q, lat, lng):
try: try:
"""auto complate words/tags/doctors etc. """auto complate words/tags/doctors etc.
...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng): ...@@ -29,7 +28,7 @@ def auto_complete_query(q,lat,lng):
# if not q: # if not q:
# return json_http_response({'error': 0, 'data': []}) # return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q,float(lat),float(lng)) data = get_suggest_tips(q, float(lat), float(lng))
result = { result = {
'error': 0, 'error': 0,
...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng): ...@@ -38,6 +37,4 @@ def auto_complete_query(q,lat,lng):
return result return result
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"error":1,"data":[]} return {"error": 1, "data": []}
from __future__ import unicode_literals, absolute_import, print_function from __future__ import unicode_literals, absolute_import, print_function
import pymysql import pymysql
from _celery import app as celery_app
pymysql.install_as_MySQLdb() pymysql.install_as_MySQLdb()
\ No newline at end of file
No preview for this file type
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from django.conf import settings
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
app = Celery('search_tips')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.conf.broker_url = settings.CELERY_BROKER_URL
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
from django.conf import settings
import itertools
import logging
class CeleryTaskRouter(object):
queue_task_map = {
"tapir-search_tips": [
'injection.data_sync.tasks.write_to_es',
]
}
# Map[TaskName, QueueName]
task_queue_map = dict(itertools.chain.from_iterable(
[(task, queue) for task in task_list]
for (queue, task_list) in queue_task_map.items()
))
def route_for_task(self, task, args=None, kwargs=None):
"""
if settings.DEBUG:
return None
if task.startswith("statistic") or task.startswith("api.tasks.export_excel_task"):
return "slow"
"""
queue_name_or_none = self.task_queue_map.get(task)
return queue_name_or_none
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
LOG_DIR = '/data/log/search_tips/app/'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s',
filename=os.path.join(LOG_DIR, 'filelog.log'),
)
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'filters': {
'require_debug_true': {
'()': 'django.utils.log.RequireDebugTrue',
},
},
'formatters': {
'verbose': {
'format': '%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s'
},
'simple': {
'format': '%(levelname)s %(message)s'
},
'profile': {
'format': '%(asctime)s %(message)s'
},
'raw': {
'format': '%(message)s'
}
},
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
# 默认的服务器Log(保存到log/filelog.log中, 通过linux的logrotate来处理日志的分割
'default': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'filelog.log'),
'formatter': 'verbose',
},
# 默认的服务器ERROR log
'default_err': {
'level': 'ERROR',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'error_logger.log'),
'formatter': 'verbose',
},
'exception_logger': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'exception_logger.log'),
'formatter': 'verbose',
},
'tracer_handler': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'tracer.log'),
'formatter': 'raw'
},
},
'loggers': {
'django': {
'handlers': ['default'],
'propagate': True,
'level': 'INFO',
},
'django.request': {
'handlers': ['default_err'],
'level': 'ERROR',
'propagate': False,
},
'exception_logger': {
'handlers': ['exception_logger'],
'level': 'INFO',
'propagate': False,
},
'gm_tracer.subscribe': {
'handlers': ['tracer_handler'],
'propagate': False,
'level': 'INFO'
},
},
}
...@@ -48,7 +48,7 @@ DEBUG = True ...@@ -48,7 +48,7 @@ DEBUG = True
# } # }
CELERY_TIMEZONE = 'Asia/Shanghai' CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter'] CELERY_ROUTES = ['search_tips.celery_task_router.CeleryTaskRouter']
# Application definition # Application definition
...@@ -60,7 +60,9 @@ INSTALLED_APPS = [ ...@@ -60,7 +60,9 @@ INSTALLED_APPS = [
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'trans2es', 'trans2es',
'search' 'search',
'injection.data_sync',
"associate"
] ]
MIDDLEWARE = [ MIDDLEWARE = [
......
No preview for this file type
...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE ...@@ -10,8 +10,6 @@ from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid import uuid
def uuid4(): def uuid4():
""" """
...@@ -19,10 +17,11 @@ def uuid4(): ...@@ -19,10 +17,11 @@ def uuid4():
""" """
return uuid.uuid4().hex return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name): def get_tips_suggest_list(instance_cn_name):
try: try:
#ch_full_weight = 6.0 * 1000 # ch_full_weight = 6.0 * 1000
#py_full_weight = 3.0 * 1000 # py_full_weight = 3.0 * 1000
full_weight = 3.0 * 1000 full_weight = 3.0 * 1000
py_acronym_full_weight = 3.0 * 1000 py_acronym_full_weight = 3.0 * 1000
...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -42,13 +41,14 @@ def get_tips_suggest_list(instance_cn_name):
# 中文 # 中文
for i in range(len(ch_full_word)): for i in range(len(ch_full_word)):
ch_name_term = ch_full_word[i:].strip() ch_name_term = ch_full_word[i:].strip()
if ch_name_term and ch_full_word[i]!="(" and ch_full_word[i]!=")": if ch_name_term and ch_full_word[i] != "(" and ch_full_word[i] != ")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1 suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [ch_name_term], "input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if ch_name_term[0] not in suggest_dict: if ch_name_term[0] not in suggest_dict:
...@@ -59,18 +59,19 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -59,18 +59,19 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[ch_name_term[0]]["input"].append(ch_name_term) suggest_dict[ch_name_term[0]]["input"].append(ch_name_term)
if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]: if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]:
suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"] suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"] suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音 # 拼音
if py_full_word != ch_full_word: if py_full_word != ch_full_word:
for i in range(len(py_full_word)): for i in range(len(py_full_word)):
py_name_term = py_full_word[i:].strip() py_name_term = py_full_word[i:].strip()
if py_name_term and py_full_word[i]!="(" and py_full_word[i]!=")": if py_name_term and py_full_word[i] != "(" and py_full_word[i] != ")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight prefix_weight = py_prefix_weight if len(py_name_term) != len(py_full_word) else full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3 suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_name_term], "input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
...@@ -82,20 +83,21 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -82,20 +83,21 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_name_term[0]]["input"].append(py_name_term) suggest_dict[py_name_term[0]]["input"].append(py_name_term)
if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]: if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]:
suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"] suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"] suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 简写 # 简写
if py_acronym_full_word != py_full_word: if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)): for i in range(len(py_acronym_full_word)):
py_acronym_term = py_acronym_full_word[i:].strip() py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term and py_acronym_full_word[i]!="(" and py_acronym_full_word[i]!=")": if py_acronym_term and py_acronym_full_word[i] != "(" and py_acronym_full_word[i] != ")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len( prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5 suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0 term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = { suggest_item = {
"input": [py_acronym_term], "input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight, "word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type "suggest_type": suggest_type
} }
if py_acronym_term[0] not in suggest_dict: if py_acronym_term[0] not in suggest_dict:
...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name): ...@@ -108,9 +110,7 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"] suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"] suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values() return suggest_dict.values()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -5,7 +5,8 @@ import logging ...@@ -5,7 +5,8 @@ import logging
import traceback import traceback
import json import json
from libs.cache import redis_client from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion from trans2es.models.query_word_conversion import TagConversion, DoctorConversion, HospitalConversion
class TagTab(object): class TagTab(object):
Weight = { Weight = {
...@@ -84,7 +85,6 @@ class HosTab(TagTab): ...@@ -84,7 +85,6 @@ class HosTab(TagTab):
class QueryWordAttr(object): class QueryWordAttr(object):
# 获取TagConversion表最新的日期 # 获取TagConversion表最新的日期
tag_latest_date = None tag_latest_date = None
doctor_latest_date = None doctor_latest_date = None
...@@ -97,12 +97,12 @@ class QueryWordAttr(object): ...@@ -97,12 +97,12 @@ class QueryWordAttr(object):
hostab = HosTab() hostab = HosTab()
@classmethod @classmethod
def get_project_query_word_weight(cls,name): def get_project_query_word_weight(cls, name):
try: try:
if not cls.tag_latest_date: if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date) tag_query_results = TagConversion.objects.filter(query=name, update_date=cls.tag_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in tag_query_results: for query_item in tag_query_results:
...@@ -120,7 +120,7 @@ class QueryWordAttr(object): ...@@ -120,7 +120,7 @@ class QueryWordAttr(object):
if not cls.doctor_latest_date: if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date) doc_query_results = DoctorConversion.objects.filter(query=name, update_date=cls.doctor_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in doc_query_results: for query_item in doc_query_results:
...@@ -138,7 +138,7 @@ class QueryWordAttr(object): ...@@ -138,7 +138,7 @@ class QueryWordAttr(object):
if not cls.hospital_latest_date: if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date) hospital_query_results = HospitalConversion.objects.filter(query=name, update_date=cls.hospital_latest_date)
total_score = 0.0 total_score = 0.0
for query_item in hospital_query_results: for query_item in hospital_query_results:
...@@ -151,10 +151,10 @@ class QueryWordAttr(object): ...@@ -151,10 +151,10 @@ class QueryWordAttr(object):
return 0.0 return 0.0
@classmethod @classmethod
def get_query_results_num(cls,name): def get_query_results_num(cls, name):
try: try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0 results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num return results_num
...@@ -168,17 +168,17 @@ from libs.cache import redis_client ...@@ -168,17 +168,17 @@ from libs.cache import redis_client
import base64 import base64
from gm_types.doris import MIND_TYPE from gm_types.doris import MIND_TYPE
QUERY_KEY = "query:{}:set" QUERY_KEY = "query:{}:set"
LABEL_VALUE = { LABEL_VALUE = {
MIND_TYPE.PROJECT : 6, MIND_TYPE.PROJECT: 8,
MIND_TYPE.BRAND: 5, MIND_TYPE.HOSPITAL: 7,
MIND_TYPE.HOSPITAL:4, MIND_TYPE.DOCTOR: 6,
MIND_TYPE.DOCTOR:3, MIND_TYPE.FREE_FACE: 4,
MIND_TYPE.AREA:2, MIND_TYPE.FACE: 3,
MIND_TYPE.USER:1, MIND_TYPE.AREA: 2,
MIND_TYPE.UNKNOWN:0 MIND_TYPE.USER: 1,
MIND_TYPE.UNKNOWN: 0
} }
QUERY_WORD_LABEL_NEED_MODIFIED = { QUERY_WORD_LABEL_NEED_MODIFIED = {
...@@ -189,6 +189,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = { ...@@ -189,6 +189,7 @@ QUERY_WORD_LABEL_NEED_MODIFIED = {
u"脱毛": MIND_TYPE.PROJECT u"脱毛": MIND_TYPE.PROJECT
} }
def label_key(label): def label_key(label):
return LABEL_VALUE.get(label) return LABEL_VALUE.get(label)
...@@ -198,11 +199,7 @@ def get_tips_word_type(query=''): ...@@ -198,11 +199,7 @@ def get_tips_word_type(query=''):
key = QUERY_KEY.format(query_base64) key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key)))) labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
labels.append(MIND_TYPE.UNKNOWN) labels.append(MIND_TYPE.UNKNOWN)
if query in QUERY_WORD_LABEL_NEED_MODIFIED: if query in QUERY_WORD_LABEL_NEED_MODIFIED:
labels.append(MIND_TYPE.PROJECT) labels.append(MIND_TYPE.PROJECT)
labels.sort(key=label_key, reverse=True) labels.sort(key=label_key, reverse=True)
return labels[0] return labels[0]
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand
import traceback
import logging
from libs.es import ESPerform
from trans2es.type_info import get_type_info_map
class Command(BaseCommand):
args = ''
help = 'dump mapping to elasticsearch'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-T', '--indices_template', dest='indices_template',
help='index template name to dump data to elasticsearch', metavar='TYPE',
default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch',
metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
)
def handle(self, *args, **options):
try:
es_cli = ESPerform.get_cli()
type_name1 = "suggest-v1"
type_name = "suggest"
if len(options["type"]):
if options["type"] == "all" or type_name == options["type"] or type_name1 == options["type"]:
type_name = options["type"]
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
if ESPerform.put_indices_template(es_cli=es_cli, template_file_name=template_file_name,
template_name=template_file_name):
logging.info("put indices template suc!")
else:
logging.error("put indices template err!")
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
{
"dynamic":"strict",
"properties": {
"id":{"type":"text"},
"suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion",
"contexts":[
{
"name":"is_online",
"type": "category"
}
]
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"tips_name_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
"ori_name":{"type":"keyword"},//原名称
"order_weight":{"type":"double"},//订单权重
"offline_score":{"type":"double"},//离线分
"results_num":{"type":"integer"},//结果数量
"type_flag":{"type":"keyword"},
"is_online":{"type":"boolean"}//上线
}
}
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
"properties": { "properties": {
"id":{"type":"text"}, "id":{"type":"text"},
"suggest":{ "suggest":{
"analyzer":"keyword",
"search_analyzer":"keyword",
"type":"completion", "type":"completion",
"contexts":[ "contexts":[
{ {
...@@ -20,4 +22,4 @@ ...@@ -20,4 +22,4 @@
"type_flag":{"type":"keyword"}, "type_flag":{"type":"keyword"},
"is_online":{"type":"boolean"}//上线 "is_online":{"type":"boolean"}//上线
} }
} }
\ No newline at end of file
...@@ -34,4 +34,4 @@ class WordRelResemble(models.Model): ...@@ -34,4 +34,4 @@ class WordRelResemble(models.Model):
db_table = 'api_wordrelresemble' db_table = 'api_wordrelresemble'
wordrel = models.ForeignKey(WordRel, related_name='all_resembles') wordrel = models.ForeignKey(WordRel, related_name='all_resembles')
word = models.CharField(u'近义词', max_length=50, db_index=True) word = models.CharField(u'近义词', max_length=50, db_index=True)
\ No newline at end of file
This diff is collapsed.
...@@ -4,15 +4,15 @@ import os ...@@ -4,15 +4,15 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class DoctorTransfer(object): class DoctorTransfer(object):
...@@ -20,14 +20,15 @@ class DoctorTransfer(object): ...@@ -20,14 +20,15 @@ class DoctorTransfer(object):
def get_doctor_data_name_mapping_results_to_redis(cls, instance): def get_doctor_data_name_mapping_results_to_redis(cls, instance):
try: try:
doctor_name = instance.name.strip() doctor_name = instance.name.strip()
if doctor_name: if doctor_name:
multi_fields = { multi_fields = {
'hospital.city_province_name': 1, 'hospital.city_province_name': 1,
'hospital.name': 1, 'hospital.name': 1,
'name': 1, 'name': 1,
'hospital.city_name':1, 'hospital.city_name': 1,
'hospital.officer_name':1, 'hospital.officer_name': 1,
'service_closure_tags':1 'service_closure_tags': 1
} }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()] query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = { multi_match = {
...@@ -39,12 +40,12 @@ class DoctorTransfer(object): ...@@ -39,12 +40,12 @@ class DoctorTransfer(object):
q = { q = {
"size": 0, "size": 0,
"query":{ "query": {
"bool":{ "bool": {
"should":[ "should": [
{"multi_match": multi_match} {"multi_match": multi_match}
], ],
"must":[ "must": [
{"term": {"is_online": True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
...@@ -52,14 +53,16 @@ class DoctorTransfer(object): ...@@ -52,14 +53,16 @@ class DoctorTransfer(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="doctor", doc_type="doctor", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="doctor", doc_type="doctor", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num" redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,doctor_name) redis_data = redis_client.hget(redis_key_prefix, doctor_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 't' in redis_val_dict: if 't' in redis_val_dict:
...@@ -68,33 +71,39 @@ class DoctorTransfer(object): ...@@ -68,33 +71,39 @@ class DoctorTransfer(object):
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['d'] = doctor_results redis_val_dict['d'] = doctor_results
logging.info("duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix),str(redis_val_dict))) logging.info(
redis_client.hset(redis_key_prefix,doctor_name, json.dumps(redis_val_dict)) "duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix), str(redis_val_dict)))
redis_client.hset(redis_key_prefix, doctor_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_doctor_suggest_data_list(cls, instance): def get_doctor_suggest_data_list(cls, instance):
try: try:
ret_list = list() ret_list = list()
keyword = instance.name
item_dict = dict() cut_bool = False
item_dict["id"] = getMd5Digest(str(instance.name)) cut_word = ["下线", "停用", "已经下线", "账号停用"]
item_dict["ori_name"] = instance.name for i in cut_word:
item_dict["is_online"] = instance.is_online if keyword.find(i) >= 0:
cut_bool = True
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name) if cut_bool == False:
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name) item_dict = dict()
item_dict["type_flag"] = get_tips_word_type(instance.name) item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["offline_score"] = 0.0 item_dict["ori_name"] = instance.name
item_dict["tips_name_type"] = 2 item_dict["is_online"] = instance.is_online
ret_list.append(item_dict) item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
suggest_list = get_tips_suggest_list(instance.name) item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
return (item_dict,suggest_list) item_dict["tips_name_type"] = 2
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
...@@ -4,13 +4,14 @@ import os ...@@ -4,13 +4,14 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class HospitalTransfer(object): class HospitalTransfer(object):
...@@ -19,35 +20,42 @@ class HospitalTransfer(object): ...@@ -19,35 +20,42 @@ class HospitalTransfer(object):
try: try:
ret_list = list() ret_list = list()
item_dict = dict() name = instance.name.strip()
item_dict["id"] = getMd5Digest(str(instance.name)) cut_bool = False
item_dict["ori_name"] = instance.name cut_word = ["下线", "停用", "已经下线", "账号停用"]
item_dict["is_online"] = instance.is_online for i in cut_word:
if name.find(i) >= 0:
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name) cut_bool = True
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name) if cut_bool == False:
item_dict["type_flag"] = get_tips_word_type(instance.name) item_dict = dict()
item_dict["offline_score"] = 0.0 item_dict["id"] = getMd5Digest(str(name))
item_dict["tips_name_type"] = 1 item_dict["ori_name"] = name
item_dict["is_online"] = instance.is_online
ret_list.append(item_dict)
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(name)
suggest_list = get_tips_suggest_list(instance.name) item_dict["results_num"] = QueryWordAttr.get_query_results_num(name)
item_dict["type_flag"] = get_tips_word_type(name)
return (item_dict,suggest_list) item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(name)
return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
@classmethod @classmethod
def get_hospital_lat_lng_info_to_redis(cls, instance): def get_hospital_lat_lng_info_to_redis(cls, instance):
try: try:
redis_key_prefix = "search_tips:hospital_latlng_info" redis_key_prefix = "search_tips:hospital_latlng_info"
name = instance.name name = instance.name.strip()
lng = instance.baidu_loc_lng lng = instance.baidu_loc_lng
lat = instance.baidu_loc_lat lat = instance.baidu_loc_lat
pos_list = [lng,lat] pos_list = [lng, lat]
redis_client.hset(redis_key_prefix,name,pos_list) redis_client.hset(redis_key_prefix, name, pos_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -4,15 +4,14 @@ import os ...@@ -4,15 +4,14 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class ItemWikiTransfer(object): class ItemWikiTransfer(object):
...@@ -44,12 +43,12 @@ class ItemWikiTransfer(object): ...@@ -44,12 +43,12 @@ class ItemWikiTransfer(object):
q = { q = {
"size": 0, "size": 0,
"query":{ "query": {
"bool":{ "bool": {
"should":[ "should": [
{"multi_match": multi_match} {"multi_match": multi_match}
], ],
"must":[ "must": [
{"term": {"is_online": True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
...@@ -57,28 +56,29 @@ class ItemWikiTransfer(object): ...@@ -57,28 +56,29 @@ class ItemWikiTransfer(object):
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="newwiki", doc_type="newwiki", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="newwiki", doc_type="newwiki", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num" redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,wiki_name) redis_data = redis_client.hget(redis_key_prefix, wiki_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 'w' in redis_val_dict: if 't' in redis_val_dict:
total_count += int(redis_val_dict['w']) total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['w'] = doctor_results redis_val_dict['w'] = doctor_results
redis_client.hset(redis_key_prefix,wiki_name, json.dumps(redis_val_dict)) redis_client.hset(redis_key_prefix, wiki_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod @classmethod
def get_itemwiki_suggest_data_list(cls, instance): def get_itemwiki_suggest_data_list(cls, instance):
try: try:
...@@ -99,7 +99,7 @@ class ItemWikiTransfer(object): ...@@ -99,7 +99,7 @@ class ItemWikiTransfer(object):
suggest_list = get_tips_suggest_list(instance.name) suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list) return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
\ No newline at end of file
...@@ -4,18 +4,17 @@ import os ...@@ -4,18 +4,17 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
class TagTransfer(object): class TagTransfer(object):
tips_num_redis_key_prefix = "search_tips:tips_mapping_num" tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod @classmethod
...@@ -60,29 +59,31 @@ class TagTransfer(object): ...@@ -60,29 +59,31 @@ class TagTransfer(object):
{'multi_match': multi_match}, {'multi_match': multi_match},
sku_query sku_query
], ],
"must":[ "must": [
{"term":{"is_online":True}} {"term": {"is_online": True}}
], ],
"minimum_should_match": 1 "minimum_should_match": 1
} }
} }
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0) result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="service", doc_type="service", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"] doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name) redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {} redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results total_count = doctor_results
if 's' in redis_val_dict: if 't' in redis_val_dict:
total_count += int(redis_val_dict['s']) total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict)) redis_client.hset(cls.tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -90,23 +91,29 @@ class TagTransfer(object): ...@@ -90,23 +91,29 @@ class TagTransfer(object):
def get_tag_suggest_data_list(cls, instance): def get_tag_suggest_data_list(cls, instance):
try: try:
ret_list = list() ret_list = list()
keyword = instance.name
item_dict = dict() cut_bool = False
item_dict["id"] = getMd5Digest(str(instance.name)) cut_word = ["下线", "停用", "已经下线", "账号停用"]
item_dict["ori_name"] = instance.name for i in cut_word:
item_dict["is_online"] = instance.is_online if keyword.find(i) >= 0:
cut_bool = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name) if cut_bool == False:
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name) item_dict = dict()
item_dict["type_flag"] = get_tips_word_type(instance.name) item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["offline_score"] = 0.0 item_dict["ori_name"] = instance.name
item_dict["tips_name_type"] = 0 item_dict["is_online"] = instance.is_online
ret_list.append(item_dict) item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
suggest_list = get_tips_suggest_list(instance.name) item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
return (item_dict,suggest_list) item_dict["tips_name_type"] = 0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict, suggest_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[]) return ([], [])
\ No newline at end of file
...@@ -4,24 +4,30 @@ import os ...@@ -4,24 +4,30 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
from libs.tools import tzlc,getMd5Digest import base64
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
import json import json
from django.conf import settings from django.conf import settings
from trans2es.models import wordresemble from trans2es.models import wordresemble
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
class WordResemble(object): class WordResemble(object):
@classmethod @classmethod
def get_word_resemble_list(cls,keyword): def get_word_resemble_list(cls, keyword):
try: try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword) query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list() temp_list = list()
for sql_obj in query_sql_item: for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word',flat=True))) temp_list.extend(list(sql_obj.all_resembles.all().values_list('word', flat=True)))
resemble_list = list() resemble_list = list()
for item in temp_list: for item in temp_list:
...@@ -30,4 +36,172 @@ class WordResemble(object): ...@@ -30,4 +36,172 @@ class WordResemble(object):
return resemble_list return resemble_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
\ No newline at end of file
@classmethod
def get_resemble_list(cls, instance):
try:
try:
ret_list = list()
item_dict = dict()
keyword = instance.keyword
cut_bool = False
cut_word = ["下线", "停用", "已经下线", "账号停用"]
for i in cut_word:
if keyword.find(i) >= 0:
cut_bool = True
if cut_bool == False:
item_dict["id"] = getMd5Digest(str(instance.keyword))
item_dict["ori_name"] = instance.keyword
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.keyword)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.keyword)
item_dict["type_flag"] = get_tips_word_type(instance.keyword)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 4
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.keyword)
return (item_dict, suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([], [])
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_all_data_name_mapping_results_to_redis(cls, instance):
try:
total_count = 0
instance.name = instance.keyword
# 获取百科的
ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis(instance)
# 获取美购的
TagTransfer.get_tag_data_name_mapping_results_to_redis(instance)
# 获取医生医院的
DoctorTransfer.get_doctor_data_name_mapping_results_to_redis(instance)
# 获取日记的
WordResemble.get_diary_data_name_mapping_results_to_redis(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_diary_data_name_mapping_results_to_redis(cls, instance):
try:
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'tags': 8,
'doctor.name': 4,
'doctor.hospital.name': 3,
'doctor.hospital.officer_name': 3,
'user.last_name': 2,
'service.name': 1,
"title": 2}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match}
],
"must": [
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(settings.GM_ORI_ES_INFO_LIST),
sub_index_name="diary", doc_type="diary", query_body=q,
offset=0, size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['r'] = doctor_results
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls, instance):
try:
QUERY_KEY = "query:{}:search_tip"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
# result = wordresemble.WordRel.objects.all().values("keyword", "id")
# for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=instance.keyword)
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
query_base64 = base64.b64encode(instance.keyword.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
# 存储近义词
# query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(value_name)
# value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
value_names = query_base64
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, instance.keyword)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_names + str(":" + str(total_count))
redis_client.sadd(key, value)
if words:
for w in words:
value_name_w = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name_w + str(":" + str(total_count))
redis_client.sadd(key, value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment