Commit 8e7b74b8 authored by 段英荣's avatar 段英荣

Merge branch 'test' into 'master'

Test

See merge request !1
parents a9c8179e 70343f0a
<?xml version="1.0" encoding="utf-8"?>
<gm_rpcd_config>
<info config_name="app" version="1.0"/>
<config name="log_dir" value="/data/log/search_tips/app"/>
<config name="application_name" value="search_tips"/>
<config name="service_list">
<element value="search_tips"/>
</config>
<config name="initializer_list">
<element value="search_tips.django_init"/>
<element value="search.view.auto_tips"/>
</config>
</gm_rpcd_config>
...@@ -19,9 +19,13 @@ class ESPerform(object): ...@@ -19,9 +19,13 @@ class ESPerform(object):
@classmethod @classmethod
def get_cli(cls): def get_cli(cls,es_ip_list=None):
try: try:
cls.cli_obj = Elasticsearch(cls.cli_info_list) es_info_list = es_ip_list if es_ip_list else cls.cli_info_list
if es_ip_list:#tricky的做法,兼容测试es
cls.index_prefix = "gm_test"
cls.cli_obj = Elasticsearch(es_info_list)
return cls.cli_obj return cls.cli_obj
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import django.db.models
from django.conf import settings from django.conf import settings
from pytz import timezone from pytz import timezone
from datetime import datetime from datetime import datetime
from django.http import HttpResponse
import json
import hashlib
from trans2es.models import doctor
import logging
import traceback
from math import radians, cos, sin, asin, sqrt
def tzlc(dt, truncate_to_sec=True): def tzlc(dt, truncate_to_sec=True):
...@@ -16,3 +24,65 @@ def tzlc(dt, truncate_to_sec=True): ...@@ -16,3 +24,65 @@ def tzlc(dt, truncate_to_sec=True):
return timezone(settings.TIME_ZONE).localize(dt) return timezone(settings.TIME_ZONE).localize(dt)
else: else:
return timezone(settings.TIME_ZONE).normalize(dt) return timezone(settings.TIME_ZONE).normalize(dt)
def json_http_response(result, status=200, extra_headers=None):
if extra_headers is None:
extra_headers = dict()
if 'extra' not in result:
result['extra'] = {}
response = HttpResponse(
json.dumps(result),
content_type="application/json; charset=UTF-8",
status=status,
)
for header_key, header_value in extra_headers.items():
response[header_key] = header_value
return response
def getMd5Digest(ori_content):
m5 = hashlib.md5()
m5.update(ori_content.encode("utf-8"))
return m5.hexdigest()
def point_distance(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# 将十进制度数转化为弧度
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine公式
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
c = 2 * asin(sqrt(a))
r = 6371 # 地球平均半径,单位为km
return c * r * 1000
class HostpitalPosInfo(object):
hospital_pos_dict = dict()
@classmethod
def get_hostpital_info(cls):
try:
if len(cls.hospital_pos_dict) == 0:
query = doctor.Hospital.objects.all().query
query_results = django.db.models.QuerySet(model=doctor.Hospital, query=query)
for item in query_results:
name = item.name
lng = float(item.baidu_loc_lng)
lat = float(item.baidu_loc_lat)
cls.hospital_pos_dict[name] = [lng,lat]
return cls.hospital_pos_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return cls.hospital_pos_dict
g_hospital_pos_dict = HostpitalPosInfo.get_hostpital_info()
\ No newline at end of file
django==1.8
kafka-python>=1.2.1,<1.4
elasticsearch==6.3.1
redis==2.10.6
celery==4.2.1
redlock==1.2.0
kombu==4.2.2.post1
PyMySQL==0.9.2
gunicorn==19.9.0
gevent==1.3.7
pypinyin==0.34.1
git+ssh://git@git.wanmeizhensuo.com/backend/gm-rpcd.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/helios.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/gm-logging.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/gm-config.git@v0.1.3#egg=gm-config==0.1.3
git+ssh://git@git.wanmeizhensuo.com/backend/gm-protocol.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/gm-upload.git@master
git+ssh://git@git.wanmeizhensuo.com/system/gm-tracer.git@v0.1.2
git+ssh://git@git.wanmeizhensuo.com/alpha/alpha-types.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/gm-types.git@master
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
from django.contrib import admin
# Register your models here.
from __future__ import unicode_literals
from django.apps import AppConfig
class SearchConfig(AppConfig):
name = 'search'
from __future__ import unicode_literals
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance
def get_suggest_tips(query,lat,lng,offset=0,size=100):
try:
q = {
"suggest":{
"tips-suggest": {
"prefix": query,
"completion": {
"field": "suggest",
"size":size
}
}
},
"_source": {
"include": ["id", "ori_name", "offline_score", "is_online","type_flag","results_num"]
}
}
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(),sub_index_name="suggest",query_body=q,offset=offset,size=size,is_suggest_request=True)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
highlight_marks = u'<ems>%s</ems>' % query
hit_item["_source"]["highlight_name"] = hit_item["_source"]["ori_name"].replace(query,highlight_marks)
if hit_item["_source"]["type_flag"] == "hospital":
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng,lat,g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000*10000:
if distance < 1000:
if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米"
else:
hit_item["_source"]["describe"] = "约" + str(distance) + "米"
else:
hit_item["_source"]["describe"] = "约" + str(1.0*distance/1000) + "km"
else:
hit_item["_source"]["describe"] = ">1000km"
else:
hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"])
return ret_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import json_http_response
from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query")
def auto_complete_query(q):
try:
"""auto complate words/tags/doctors etc.
URL:
~/api/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
# q = request.GET.get('q', '').strip()
# if not q:
# return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q)
result = {
'error': 0,
'data': data,
}
return {"data":data}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"data":[]}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import json_http_response
from django.shortcuts import render
from search.utils.auto_tips import get_suggest_tips
def auto_complete(request):
try:
"""auto complate words/tags/doctors etc.
URL:
~/search/auto_complete?scope=[kw]&q=双
Return:
{'error': 0|1, 'data': [word, word, word],}
"""
q = request.GET.get('q', '').strip()
lat = request.GET.get('lat', 0).strip()
lng = request.GET.get('lng', 0).strip()
if not q:
return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q, float(lat), float(lng))
result = {
'error': 0,
'data': data,
}
logging.info("duan add,q is:%s,result:%s" % (str(q).encode("utf-8"),str(result).encode('utf-8')))
return json_http_response(result)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return json_http_response({'error': 1, 'data': []})
from __future__ import unicode_literals, absolute_import, print_function
import pymysql
pymysql.install_as_MySQLdb()
\ No newline at end of file
No preview for this file type
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import django
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
django.setup()
...@@ -28,7 +28,7 @@ SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t' ...@@ -28,7 +28,7 @@ SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t'
# SECURITY WARNING: don't run with debug turned on in production! # SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True DEBUG = True
ALLOWED_HOSTS = [] ALLOWED_HOSTS = ["192.168.78.2"]
SENTRY_CELERY_ENDPOINT="http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140" SENTRY_CELERY_ENDPOINT="http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8" BROKER_URL = "redis://127.0.0.1:6379/8"
...@@ -51,6 +51,8 @@ BROKER_URL = "redis://127.0.0.1:6379/8" ...@@ -51,6 +51,8 @@ BROKER_URL = "redis://127.0.0.1:6379/8"
# }, # },
# } # }
REDIS_URL = "redis://127.0.0.1:6379"
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8" CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai' CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter'] CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
...@@ -65,8 +67,7 @@ INSTALLED_APPS = [ ...@@ -65,8 +67,7 @@ INSTALLED_APPS = [
'django.contrib.messages', 'django.contrib.messages',
'django.contrib.staticfiles', 'django.contrib.staticfiles',
'trans2es', 'trans2es',
'search', 'search'
'injection.data_sync',
] ]
MIDDLEWARE = [ MIDDLEWARE = [
...@@ -115,6 +116,18 @@ DATABASES = { ...@@ -115,6 +116,18 @@ DATABASES = {
"init_command": "SET foreign_key_checks = 0;", "init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", "charset": "utf8mb4",
} }
},
'bran': {
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'bran_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
} }
} }
......
...@@ -16,6 +16,11 @@ Including another URLconf ...@@ -16,6 +16,11 @@ Including another URLconf
from django.conf.urls import url from django.conf.urls import url
from django.contrib import admin from django.contrib import admin
from search.views import auto_complete
urlpatterns = [ urlpatterns = [
url(r'^admin/', admin.site.urls), url(r'^admin/', admin.site.urls),
url(r'^search/auto_complete$', auto_complete),
] ]
from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class Trans2EsConfig(AppConfig):
name = 'trans2es'
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
import pypinyin
from pypinyin import lazy_pinyin
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid
def uuid4():
"""
:return:
"""
return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name):
try:
ch_full_weight = 6.0 * 10000
py_full_weight = 3.0 * 10000
py_acronym_full_weight = 3.0 * 10000
py_acronym_prefix_weight = 2
ch_prefix_weight = 1.5
py_prefix_weight = 1.0
# 命中开始部分加权
begin_prefix_weight = 1.2 * 10000
ch_full_word = instance_cn_name.strip()
py_full_word = ''.join(lazy_pinyin(ch_full_word))
py_acronym_full_word = ''.join(lazy_pinyin(ch_full_word, style=pypinyin.FIRST_LETTER))
suggest_list = list()
cur_index = 0
# 中文
for i in range(len(ch_full_word)):
for j in range(i, len(ch_full_word) + 1):
ch_name_term = ch_full_word[i:j].strip()
if ch_name_term:
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(
ch_full_word) else ch_full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len(
(ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type,
"cur_index": cur_index
}
cur_index += 1
suggest_list.append(suggest_item)
# 拼音
for i in range(len(py_full_word)):
for j in range(i, len(py_full_word) + 1):
py_name_term = py_full_word[i:j].strip()
if py_name_term:
prefix_weight = py_prefix_weight if len(py_name_term) != len(
py_full_word) else py_full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(
py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type,
"cur_index": cur_index
}
cur_index += 1
suggest_list.append(suggest_item)
# 简写
for i in range(len(py_acronym_full_word)):
for j in range(i, len(py_acronym_full_word) + 1):
py_acronym_term = py_acronym_full_word[i:j].strip()
if py_acronym_term:
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type,
"cur_index": cur_index
}
cur_index += 1
suggest_list.append(suggest_item)
return suggest_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
import json
from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion
class TagTab(object):
Weight = {
'search': 0.4,
'trans': 0.6
}
SearchScore = {
0.7: 100,
0.1: 80,
0.05: 60,
0.01: 40,
0: 20,
}
TransScore = {
0.7: 100,
0.5: 80,
0.3: 60,
0.1: 40,
0: 20
}
def cal_score(self, search_rate, conversion_rate):
s1 = self._cal_score(search_rate, 'SearchScore') * self.Weight['search']
s2 = self._cal_score(conversion_rate, 'TransScore') * self.Weight['trans']
return s1 + s2
def _cal_score(self, item, type_):
item *= 100.0
scoreweight = getattr(self, type_)
for k in sorted(scoreweight.keys(), reverse=True):
if item >= k:
return scoreweight[k]
class DocTab(TagTab):
SearchScore = {
0.04: 100,
0.01: 80,
0.001: 60,
0.0002: 40,
0: 20,
}
TransScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
class HosTab(TagTab):
SearchScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
TransScore = {
1: 100,
0.45: 90,
0.27: 80,
0.21: 70,
0.15: 60,
0.12: 50,
0.09: 40,
0.06: 30,
0.04: 20,
0: 10,
}
class QueryWordAttr(object):
# 获取TagConversion表最新的日期
tag_latest_date = None
doctor_latest_date = None
hospital_latest_date = None
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
tagtab = TagTab()
doctab = DocTab()
hostab = HosTab()
@classmethod
def get_project_query_word_weight(cls,name):
try:
if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date)
total_score = 0.0
for query_item in tag_query_results:
item_score = cls.tagtab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_doctor_query_word_weight(cls, name):
try:
if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date)
total_score = 0.0
for query_item in doc_query_results:
item_score = cls.doctab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_hospital_query_word_weight(cls, name):
try:
if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date)
total_score = 0.0
for query_item in hospital_query_results:
item_score = cls.hostab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_query_results_num(cls,name):
try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0
# !/usr/bin/env python
# encoding=utf-8
from __future__ import unicode_literals, absolute_import, print_function
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import django.db.models
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
import traceback
import logging
import six
import sys
from libs.es import ESPerform
import trans2es.models as md
from libs.table_scan import TableSlicer,TableSlicerChunk
from trans2es.type_info import get_type_info_map,TypeInfo
from libs.cache import redis_client
import json
class Job(object):
__es = None
def __init__(self, sub_index_name, type_name, chunk):
assert isinstance(sub_index_name, six.string_types)
assert isinstance(type_name, six.string_types)
assert isinstance(chunk, TableSlicerChunk)
self._sub_index_name = sub_index_name
self._type_name = type_name
self._chunk = chunk
@classmethod
def get_es(cls):
if cls.__es is None:
cls.__es = ESPerform().get_cli()
return cls.__es
def __call__(self):
type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name,
table_chunk=self._chunk,
es=self.get_es(),
)
class Command(BaseCommand):
args = ''
help = 'dump data to elasticsearch, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch', metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='')
)
def __sync_data_by_type(self, type_name):
try:
type_info = get_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
job = Job(
sub_index_name=type_info.name,
type_name=type_name,
chunk=chunk,
)
job()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options):
try:
type_name_list = get_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name==options["type"]:
logging.info("begin sync [%s] data to es!" % type_name)
self.__sync_data_by_type(type_name)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import django.db.models
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
import traceback
import logging
import six
import sys
from libs.es import ESPerform
import trans2es.models as md
from libs.table_scan import TableSlicer,TableSlicerChunk
from trans2es.type_info import get_type_info_map,TypeInfo,tips_attr_sync_to_redis_type_info_map
from libs.cache import redis_client
import json
class Command(BaseCommand):
args = ''
help = 'dump data to redis, parallel'
from optparse import make_option
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch', metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='')
)
def handle(self, *args, **options):
try:
type_name_list = tips_attr_sync_to_redis_type_info_map().keys()
for type_name in type_name_list:
if len(options["type"]):
if options["type"] == "all" or type_name==options["type"]:
logging.info("begin sync [%s] data to redis!" % type_name)
type_info = tips_attr_sync_to_redis_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
for instance in list(chunk):
type_info.get_data_func(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
{
"dynamic":"strict",
"properties": {
"id":{"type":"text"},
"suggest":{
"type":"completion"
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"tips_name_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
"ori_name":{"type":"keyword"},//原名称
"order_weight":{"type":"double"},//订单权重
"offline_score":{"type":"double"},//离线分
"results_num":{"type":"integer"},//结果数量
"type_flag":{"type":"keyword"},
"is_online":{"type":"boolean"}//上线
}
}
\ No newline at end of file
from django.db import models
# Create your models here.
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.db import models
import logging
import traceback
import datetime
class BrandWiki(models.Model):
class Meta:
verbose_name = u'品牌'
db_table = 'wiki_brand'
id = models.IntegerField(verbose_name=u'品牌ID', primary_key=True)
name = models.CharField(u'项目介绍', max_length=48)
description = models.TextField(u'品牌简介')
is_online = models.BooleanField(u'是否上线', default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.db import models
import datetime
class CollectWiki(models.Model):
class Meta:
verbose_name = u'聚合新百科'
db_table = 'wiki_collect'
id = models.IntegerField(verbose_name=u'聚合ID', primary_key=True)
name = models.CharField(u'聚合名称', max_length=48)
description = models.TextField(u'简介')
is_online = models.BooleanField(u'是否上线', default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
import traceback
import logging
from libs.es import ESPerform
from django.db import models
import datetime
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
SERVICE_FLAG)
from trans2es.commons.commons import uuid4
class Hospital(models.Model):
class Meta:
verbose_name = u'30. 医院'
verbose_name_plural = u'30. 医院'
db_table = 'api_hospital'
app_label = 'api'
id = models.CharField(max_length=100, primary_key=True, default=uuid4)
name = models.CharField(max_length=100, unique=True, help_text=u"医院名称")
google_loc_lng = models.FloatField(default=0, help_text=u"经度(-180~180)")
google_loc_lat = models.FloatField(default=0, help_text=u"纬度")
baidu_loc_lng = models.FloatField(default=0, help_text=u"经度(-180~180)")
baidu_loc_lat = models.FloatField(default=0, help_text=u"纬度")
class Doctor(models.Model):
class Meta:
verbose_name = u'31. 医生'
verbose_name_plural = u'31. 医生'
db_table = 'api_doctor'
app_label = 'api'
id = models.CharField(max_length=100, primary_key=True)
name = models.CharField(max_length=200, verbose_name=u"姓名", db_index=True)
is_online = models.BooleanField(default=False, help_text=u"是否上线", verbose_name=u"上线", db_index=True)
doctor_type = models.CharField(max_length=1, default=DOCTOR_TYPE.DOCTOR,
choices=DOCTOR_TYPE, null=False,
verbose_name=u'医生类型')
hospital = models.ForeignKey(Hospital, null=True, blank=False, related_name="doctor_hospital", verbose_name=u"医院")
class Service(models.Model):
class Meta:
verbose_name = u'福利'
verbose_name_plural = u'福利'
db_table = 'api_service'
app_label = 'api'
name = models.CharField(max_length=100, null=False, verbose_name=u'福利名称')
short_description = models.CharField(max_length=100, null=False, verbose_name=u'一句话描述')
detail_description = models.TextField(max_length=2000, verbose_name=u'详细描述')
doctor = models.ForeignKey(Doctor, related_name='services')
phone = models.CharField(max_length=20, null=True, blank=True, verbose_name=u'服务电话')
is_online = models.BooleanField(default=False, help_text=u"是否上线", verbose_name=u"上线")
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
import datetime
from django.db import models
from gm_types.wiki import TREATMENT_TYPE, ANESTHESIA_TYPE, ACHE_LEVEL_NEW
import traceback
import logging
class ItemWiki(models.Model):
class Meta:
verbose_name = u'项目新百科'
db_table = 'wiki_item'
id = models.IntegerField(verbose_name=u'百科ID', primary_key=True)
name = models.CharField(u'项目名称', max_length=48)
description = models.TextField(u'介绍')
use_result = models.CharField(u'使用功效', max_length=128)
is_online = models.BooleanField(u'是否上线', default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.db import models
from gm_types.wiki import PRODUCT_PARAMS, ANESTHESIA_TYPE, TREATMENT_TYPE
import logging, traceback
import datetime
class ProductWiki(models.Model):
class Meta:
verbose_name = u'物品百科'
db_table = 'wiki_product'
id = models.IntegerField(u"物品百科id", primary_key=True)
name = models.CharField(u'物品名称', max_length=50)
product_type = models.CharField(u'物品类型', max_length=4)
brand_id = models.IntegerField(u"商品id", null=True)
description = models.TextField(u'项目描述', blank=True)
special = models.TextField(u'特色')
effect = models.CharField(u'功效', max_length=128)
treatment_method = models.CharField(u'治疗方式', max_length=8, choices=TREATMENT_TYPE)
is_online = models.BooleanField(u'是否上线', default=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import operator
from django.db import models
from django.db.models import Q
from django.db import transaction
from gm_types.gaia import TAG_ALERT_TYPE
manager = lambda: models.Manager().db_manager(using='bran')
class TagConversion(models.Model):
class Meta:
verbose_name = '综合、美购、日记tab'
db_table = 'trade_tag_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=120, verbose_name='搜索时的关键词')
business_id = models.IntegerField(verbose_name='关键词精确匹配的tagID')
business_type = models.CharField(max_length=5, verbose_name='tag分级')
search_rate = models.FloatField(verbose_name='搜索次数占比')
conversion_rate = models.FloatField(verbose_name='tag转化率')
class DoctorConversion(models.Model):
class Meta:
verbose_name = '医生tab'
db_table = 'trade_doctor_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=128)
business_id = models.CharField(max_length=100)
business_type = models.CharField(max_length=1)
search_rate = models.FloatField()
conversion_rate = models.FloatField()
class HospitalConversion(models.Model):
class Meta:
verbose_name = '医院tab'
db_table = 'trade_hospital_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=128)
business_id = models.CharField(max_length=100)
business_type = models.CharField(max_length=1)
search_rate = models.FloatField()
conversion_rate = models.FloatField()
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import operator
from django.db import models
from django.db.models import Q
from django.db import transaction
import logging
import traceback
from gm_types.gaia import TAG_ALERT_TYPE
from django.utils import timezone
from gm_upload import IMG_TYPE, ImgUrlField
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
SERVICE_FLAG)
from trans2es.models.query_word_conversion import TagConversion
class Tag(models.Model):
class Meta:
verbose_name = "标签"
verbose_name_plural = "标签"
app_label = 'api'
name = models.CharField(
verbose_name='名称',
max_length=64,
null=False,
blank=False,
db_index=True,
unique=True,
)
tag_type = models.CharField(
verbose_name='标签类型',
max_length=4,
null=False,
blank=False,
choices=TAG_TYPE,
)
is_online = models.BooleanField(null=False, default=True, verbose_name='是否上线')
\ No newline at end of file
from django.test import TestCase
# Create your tests here.
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
import time
import datetime
import logging
import traceback
import django.db.models
from django.conf import settings
from libs.es import ESPerform
import elasticsearch
import elasticsearch.helpers
import sys
import copy
from trans2es.models import doctor,itemwiki,collectwiki,brandwiki,productwiki,tag
from trans2es.utils.doctor_transfer import DoctorTransfer
from trans2es.utils.hospital_transfer import HospitalTransfer
from trans2es.utils.itemwiki_transfer import ItemWikiTransfer
from trans2es.utils.collectwiki_transfer import CollectWikiTransfer
from trans2es.utils.brandwiki_transfer import BrandWikiTransfer
from trans2es.utils.productwiki_transfer import ProduceWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
from libs.es import ESPerform
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
SERVICE_FLAG)
__es = None
def get_elasticsearch_instance():
global __es
if __es is None:
__es = ESPerform.get_cli()
return __es
def get_es_list_by_type(es_type):
return [get_elasticsearch_instance()]
class TypeInfo(object):
def __init__(
self,
name,
type,
model,
query_deferred,
get_data_func,
bulk_insert_chunk_size,
round_insert_chunk_size,
round_insert_period,
batch_get_data_func=None, # receive a list of pks, not instance
logic_database_id=None,
):
self.name = name
self.type = type
self.model = model
self.query_deferred = query_deferred
self.get_data_func = get_data_func
self.batch_get_data_func = batch_get_data_func
self.pk_blacklist = ()
self.bulk_insert_chunk_size = bulk_insert_chunk_size
self.round_insert_chunk_size = round_insert_chunk_size
self.round_insert_period = round_insert_period
self.logic_database_id = logic_database_id
@property
def query(self):
return self.query_deferred()
@property
def queryset(self):
return django.db.models.QuerySet(model=self.model, query=self.query)
@property
def pk_blacklist(self):
return self.__pk_blacklist
@pk_blacklist.setter
def pk_blacklist(self, value):
self.__pk_blacklist = frozenset(value)
def bulk_get_data(self, instance_iterable):
data_list = []
if self.batch_get_data_func:
_pk_list = [getattr(instance, 'pk', None) for instance in instance_iterable]
not_found_pk_list = []
blacklisted_pk_list = []
pk_list = []
for pk in _pk_list:
if pk is None:
not_found_pk_list.append(pk)
elif pk in self.__pk_blacklist:
blacklisted_pk_list.append(pk)
else:
pk_list.append(pk)
if not_found_pk_list:
logging.exception('those pks not found for name={}, doc_type={}, pk_list={}'.format(
self.name,
self.type,
str(not_found_pk_list),
))
if blacklisted_pk_list:
logging.info('those pks are in blacklist for name={}, doc_type={}, pk_list={}'.format(
self.name,
self.type,
str(blacklisted_pk_list),
))
try:
data_list = self.batch_get_data_func(pk_list)
except Exception:
traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk_list={}'.format(
self.name,
self.type,
str(pk_list),
))
else:
for instance in instance_iterable:
pk = getattr(instance, 'pk', None)
try:
if pk is None:
raise Exception('pk not found')
if pk in self.__pk_blacklist:
logging.info('bulk_get_data for name={}, doc_type={}, pk={}: ignore blacklisted pk'.format(
self.name,
self.type,
pk,
))
continue
data = self.get_data_func(instance)
(item_dict, suggest_list) = data
for suggest_item in suggest_list:
suggest_dict = copy.deepcopy(item_dict)
suggest_dict["suggest_type"] = suggest_item["suggest_type"]
suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"]
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"])
}
data_list.append(suggest_dict)
except Exception:
traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
self.name,
self.type,
pk,
))
return data_list
def elasticsearch_bulk_insert_data(self, sub_index_name, data_list, es=None):
# assert (es is not None)
# index = ESPerform.get_official_index_name(sub_index_name=sub_index_name,index_flag="write")
# bulk_actions = []
# for data in data_list:
# bulk_actions.append({
# '_op_type': 'index',
# '_index': index,
# '_type': "_doc",
# '_id': data['id'],
# '_source': data,
# })
#
# es_result = None
# if bulk_actions:
# for t in es:
# try:
# es_result = elasticsearch.helpers.bulk(client=t, actions=bulk_actions)
# except Exception as e:
# traceback.print_exc()
# es_result = 'error'
return ESPerform.es_helpers_bulk(es,data_list,sub_index_name,True)
def elasticsearch_bulk_insert(self, sub_index_name, instance_iterable, es=None):
data_list = self.bulk_get_data(instance_iterable)
return self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name,
data_list=data_list,
es=es,
)
def insert_table_by_pk_list(self, sub_index_name, pk_list, es=None, use_batch_query_set=False):
if use_batch_query_set:
qs = self.queryset
else:
qs = self.model.objects.all()
instance_list = qs.filter(pk__in=pk_list)
data_list = self.bulk_get_data(instance_list)
self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name,
data_list=data_list,
es=es,
)
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
start_clock = time.clock()
start_time = time.time()
instance_list = list(table_chunk)
stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list)
stage_2_time = time.time()
es_result = ESPerform.es_helpers_bulk(
es_cli=es,
data_list=data_list,
sub_index_name=sub_index_name,
auto_create_index=True
)
stage_3_time = time.time()
end_clock = time.clock()
return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}').format(
datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
index_prefix=sub_index_name,
type_name=self.name,
pk_start=repr(table_chunk.get_pk_start()),
pk_stop=repr(table_chunk.get_pk_stop()),
count=len(instance_list),
stage_1_duration=stage_1_time - start_time,
stage_2_duration=stage_2_time - stage_1_time,
stage_3_duration=stage_3_time - stage_2_time,
clock_duration=end_clock - start_clock,
response=es_result,
)
_get_type_info_map_result = None
def get_type_info_map():
global _get_type_info_map_result
if _get_type_info_map_result:
return _get_type_info_map_result
type_info_list = [
TypeInfo(
name='suggest',
type='doctor_tips',# doctor
model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.DOCTOR).query,
get_data_func=DoctorTransfer.get_doctor_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='hospital_tips', # hospital
model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query,
get_data_func=HospitalTransfer.get_hospital_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='itemwiki_tips', # itemwiki
model=itemwiki.ItemWiki,
query_deferred=lambda: itemwiki.ItemWiki.objects.all().query,
get_data_func=ItemWikiTransfer.get_itemwiki_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='collectwiki_tips', # collectwiki
model=collectwiki.CollectWiki,
query_deferred=lambda: collectwiki.CollectWiki.objects.all().query,
get_data_func=CollectWikiTransfer.get_collectwiki_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='brandwiki_tips', # brandwiki
model=brandwiki.BrandWiki,
query_deferred=lambda: brandwiki.BrandWiki.objects.all().query,
get_data_func=BrandWikiTransfer.get_brandwiki_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='productwiki_tips', # productwiki
model=productwiki.ProductWiki,
query_deferred=lambda: productwiki.ProductWiki.objects.all().query,
get_data_func=ProduceWikiTransfer.get_productwiki_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='tag_tips', # tag
model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
)
]
type_info_map = {
type_info.type: type_info
for type_info in type_info_list
}
_get_type_info_map_result = type_info_map
return type_info_map
def tips_attr_sync_to_redis_type_info_map():
global _get_type_info_map_result
if _get_type_info_map_result:
return _get_type_info_map_result
type_info_list = [
TypeInfo(
name='suggest',
type='doctor_results_num',# doctor结果数
model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().query,
get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='tag_results_num', # tag结果数
model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().filter(tag_type__in=[TAG_TYPE.BODY_PART, TAG_TYPE.BODY_PART_SUB_ITEM, TAG_TYPE.ITEM_WIKI]).query,
get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='itemwiki_results_num', # itemwiki
model=itemwiki.ItemWiki,
query_deferred=lambda: itemwiki.ItemWiki.objects.all().query,
get_data_func=ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='collectwiki_results_num', # collectwiki
model=collectwiki.CollectWiki,
query_deferred=lambda: collectwiki.CollectWiki.objects.all().query,
get_data_func=ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='brandwiki_results_num', # brandwiki
model=brandwiki.BrandWiki,
query_deferred=lambda: brandwiki.BrandWiki.objects.all().query,
get_data_func=ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='productwiki_results_num', # productwiki
model=productwiki.ProductWiki,
query_deferred=lambda: productwiki.ProductWiki.objects.all().query,
get_data_func=ItemWikiTransfer.get_wiki_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='hospital_latlng', # hospital_pos_info
model=doctor.Hospital,
query_deferred=lambda: doctor.Hospital.objects.all().query,
get_data_func=HospitalTransfer.get_hospital_lat_lng_info_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
)
]
type_info_map = {
type_info.type: type_info
for type_info in type_info_list
}
_get_type_info_map_result = type_info_map
return type_info_map
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class BrandWikiTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_brandwiki_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class CollectWikiTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_collectwiki_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class DoctorTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_doctor_data_name_mapping_results_to_redis(cls, instance):
try:
doctor_name = instance.name.strip()
if doctor_name:
multi_fields = {
'hospital.city_province_name': 1,
'hospital.name': 1,
'name': 1,
'hospital.city_name':1,
'hospital.officer_name':1,
'service_closure_tags':1
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': doctor_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
q = {
"size": 0,
"query":{
"bool":{
"should":[
{"multi_match": multi_match}
],
"must":[
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(cls.test_es_info_list), sub_index_name="doctor", doc_type="doctor", query_body=q,offset=0,size=0)
doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,doctor_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 't' in redis_val_dict:
total_count += int(redis_val_dict['t'])
redis_val_dict['t'] = total_count
redis_val_dict['d'] = doctor_results
logging.info("duan add,redis_key_prefix:%s,redis_val_dict:%s" % (str(redis_key_prefix),str(redis_val_dict)))
redis_client.hset(redis_key_prefix,doctor_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_doctor_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "doctor"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 2
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class HospitalTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_hospital_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "hospital"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
@classmethod
def get_hospital_lat_lng_info_to_redis(cls, instance):
try:
redis_key_prefix = "search_tips:hospital_latlng_info"
name = instance.name
lng = instance.baidu_loc_lng
lat = instance.baidu_loc_lat
pos_list = [lng,lat]
redis_client.hset(redis_key_prefix,name,pos_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class ItemWikiTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_wiki_data_name_mapping_results_to_redis(cls, instance):
try:
wiki_name = instance.name.strip()
if wiki_name:
multi_fields = {
'name': 1,
'association_tags': 1,
"description": 1,
"use_result": 1,
"treatment_method": 1,
"effect": 1,
"ingredient": 1,
"nature_type": 1,
"shape": 1,
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': wiki_name,
'type': 'cross_fields',
'operator': 'or',
'fields': query_fields,
}
q = {
"size": 0,
"query":{
"bool":{
"should":[
{"multi_match": multi_match}
],
"must":[
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(cls.test_es_info_list), sub_index_name="newwiki", doc_type="newwiki", query_body=q,offset=0,size=0)
doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,wiki_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 'w' in redis_val_dict:
total_count += int(redis_val_dict['w'])
redis_val_dict['t'] = total_count
redis_val_dict['w'] = doctor_results
redis_client.hset(redis_key_prefix,wiki_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_itemwiki_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class ProduceWikiTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_productwiki_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class TagTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod
def get_tag_data_name_mapping_results_to_redis(cls, instance):
try:
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'short_description': 1,
'doctor.name': 1,
'doctor.hospital.name': 1,
'doctor.hospital.city_name': 1,
'doctor.hospital.city_province_name': 1,
'closure_tags': 1, # 5.9版 搜索所有tag
'doctor.hospital.officer_name': 1 # 搜索机构管理者
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
sku_query = {
"nested": {
"path": "sku_list",
"query": {
"multi_match": {
"query": tag_name,
"fields": ["sku_list.name^2"],
'operator': 'and',
'type': 'cross_fields'
}
}
}
}
q['query'] = {
'bool': {
"should": [
{'multi_match': multi_match},
sku_query
],
"must":[
{"term":{"is_online":True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(cls.test_es_info_list), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data,encoding="utf-8")) if redis_data else {}
total_count = doctor_results
if 's' in redis_val_dict:
total_count += int(redis_val_dict['s'])
redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_tag_suggest_data_list(cls, instance):
try:
ret_list = list()
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 0
ret_list.append(item_dict)
suggest_list = get_tips_suggest_list(instance.name)
return (item_dict,suggest_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[])
\ No newline at end of file
from django.shortcuts import render
# Create your views here.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment