Commit 563739fe authored by 段英荣's avatar 段英荣

Merge branch 'dev' into 'master'

Dev

See merge request !7
parents 957f5c0c 9d072b6f
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*~
# C extensions
*.so
# Distribution / packaging
.Python
.vscode
env/
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
.idea/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# Rope
.ropeproject
# Django stuff:
*.log
*.pot
# Sphinx documentation
docs/_build/
# config
fabfile.py
settings.online.py
settings_local.py
media/
log/
crawldata/
conf/
/static
.vagrant/
Vagrantfile
*.DS_Store
dump.rdb
......@@ -43,12 +43,12 @@ RUN apk add --no-cache --virtual .build-deps \
&& apk del .build-deps \
&& mkdir -p /data/log/search_tips/app
ENV GM_RPCD_DEPLOY_CONF_PATH "/srv/apps/search_tips/app_conf.xml" \
ENV GM_RPCD_DEPLOY_CONF_PATH="/srv/apps/search_tips/app_conf.xml" \
DJANGO_SETTINGS_MODULE=search_tips.settings
COPY . /srv/apps/search_tips/
WORKDIR /srv/apps/search_tips/
CMD gunicorn search_tips.wsgi:application -w 1 -k gevent -b 0.0.0.0:8000
CMD gunicorn gm_rpcd.wsgi:application -w 1 -k gevent -b 0.0.0.0:8000
......@@ -22,8 +22,8 @@ class ESPerform(object):
def get_cli(cls,es_ip_list=None):
try:
es_info_list = es_ip_list if es_ip_list else cls.cli_info_list
if es_ip_list:#tricky的做法,兼容测试es
cls.index_prefix = "gm_test"
# if es_ip_list:#tricky的做法,兼容测试es
# cls.index_prefix = "gm_test"
cls.cli_obj = Elasticsearch(es_info_list)
return cls.cli_obj
......
......@@ -22,7 +22,13 @@ def get_suggest_tips(query,lat,lng,offset=0,size=50):
"prefix": query,
"completion": {
"field": "suggest",
"size": size
"size": size,
"contexts":{
"is_online":[True]
},
"fuzzy":{
"fuzziness": 0
}
}
}
},
......@@ -42,20 +48,23 @@ def get_suggest_tips(query,lat,lng,offset=0,size=50):
if hit_item["_source"]["type_flag"] == "hospital":
if hit_item["_source"]["ori_name"] in g_hospital_pos_dict:
distance = point_distance(lng,lat,g_hospital_pos_dict[hit_item["_source"]["ori_name"]][0],g_hospital_pos_dict[hit_item["_source"]["ori_name"]][1])
if distance < 1000*10000:
if distance < 1000*50:
if distance < 1000:
if distance < 100:
hit_item["_source"]["describe"] = "<100" + "米"
else:
hit_item["_source"]["describe"] = "约" + str(distance) + "米"
hit_item["_source"]["describe"] = "约" + str(int(distance)) + "米"
else:
hit_item["_source"]["describe"] = "约" + str(round(1.0*distance/1000,1)) + "km"
else:
hit_item["_source"]["describe"] = ">1000km"
hit_item["_source"]["describe"] = ">50km"
else:
hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else ""
if hit_item["_source"]["type_flag"] == "doctor":
hit_item["_source"]["describe"] = ""
else:
hit_item["_source"]["describe"] = "约"+str(hit_item["_source"]["results_num"])+"个结果" if hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"])
return ret_list
......
......@@ -14,7 +14,7 @@ from search.utils.auto_tips import get_suggest_tips
@bind("search_tips/search/auto_complete_query")
def auto_complete_query(q):
def auto_complete_query(q,lat,lng):
try:
"""auto complate words/tags/doctors etc.
......@@ -29,15 +29,15 @@ def auto_complete_query(q):
# if not q:
# return json_http_response({'error': 0, 'data': []})
data = get_suggest_tips(q)
data = get_suggest_tips(q,float(lat),float(lng))
result = {
'error': 0,
'data': data,
}
return {"data":data}
return result
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"data":[]}
return {"error":1,"data":[]}
......@@ -8,7 +8,8 @@ SENTRY_CELERY_ENDPOINT="http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad
BROKER_URL = "redis://127.0.0.1:6379/8"
REDIS_URL = "redis://127.0.0.1:6379"
#REDIS_URL = "redis://127.0.0.1:6379"
REDIS_URL = "redis://127.0.0.1:6379/1"
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
......
......@@ -40,16 +40,15 @@ def get_tips_suggest_list(instance_cn_name):
cur_index = 0
# 中文
for i in range(len(ch_full_word)):
#for j in range(i, len(ch_full_word) + 1):
ch_name_term = ch_full_word[i:].strip()
if ch_name_term:
if ch_name_term and ch_full_word[i]!="(" and ch_full_word[i]!=")":
prefix_weight = ch_prefix_weight if len(ch_name_term) != len(ch_full_word) else ch_full_weight
suggest_type = 0 if len(ch_name_term) != len(ch_full_word) else 1
#term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [ch_name_term],
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight,
"suggest_type": suggest_type,
"word_weight": (1.0 * len(ch_name_term) / len((ch_full_word))) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if ch_name_term[0] not in suggest_dict:
cur_index += 1
......@@ -59,23 +58,22 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[ch_name_term[0]]["input"].append(ch_name_term)
if suggest_item["word_weight"] > suggest_dict[ch_name_term[0]]["word_weight"]:
suggest_dict[ch_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
suggest_dict[ch_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 拼音
if py_full_word != ch_full_word:
for i in range(len(py_full_word)):
#for j in range(i, len(py_full_word) + 1):
py_name_term = py_full_word[i:].strip()
if py_name_term:
if py_name_term and py_full_word[i]!="(" and py_full_word[i]!=")":
prefix_weight = py_prefix_weight if len(py_name_term) != len(
py_full_word) else py_full_weight
suggest_type = 2 if len(py_name_term) != len(py_full_word) else 3
#term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_name_term],
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight,
"word_weight": (1.0 * len(py_name_term) / len(py_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_name_term[0] not in suggest_dict:
cur_index += 1
suggest_item["cur_index"] = cur_index
......@@ -84,22 +82,20 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_name_term[0]]["input"].append(py_name_term)
if suggest_item["word_weight"] > suggest_dict[py_name_term[0]]["word_weight"]:
suggest_dict[py_name_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
suggest_dict[py_name_term[0]]["suggest_type"] = suggest_item["suggest_type"]
# 简写
if py_acronym_full_word != py_full_word:
for i in range(len(py_acronym_full_word)):
#for j in range(i, len(py_acronym_full_word) + 1):
py_acronym_term = py_acronym_full_word[i:].strip()
if py_acronym_term:
if py_acronym_term and py_acronym_full_word[i]!="(" and py_acronym_full_word[i]!=")":
prefix_weight = py_acronym_prefix_weight if len(py_acronym_term) != len(
py_acronym_full_word) else py_acronym_full_weight
suggest_type = 4 if len(py_acronym_term) != len(py_acronym_full_word) else 5
#term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
term_begin_prefix_weight = begin_prefix_weight if i == 0 else 1.0
suggest_item = {
"input": [py_acronym_term],
"word_weight": (1.0 * len(py_acronym_term) / len(
py_acronym_full_word)) * prefix_weight,
"word_weight": (1.0 * len(py_acronym_term) / len(py_acronym_full_word)) * prefix_weight * term_begin_prefix_weight,
"suggest_type": suggest_type
}
if py_acronym_term[0] not in suggest_dict:
......@@ -112,6 +108,7 @@ def get_tips_suggest_list(instance_cn_name):
suggest_dict[py_acronym_term[0]]["word_weight"] = suggest_item["word_weight"]
suggest_dict[py_acronym_term[0]]["suggest_type"] = suggest_item["suggest_type"]
return suggest_dict.values()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -163,3 +163,46 @@ class QueryWordAttr(object):
return 0
from gm_rpcd.all import bind
from libs.cache import redis_client
import base64
from gm_types.doris import MIND_TYPE
QUERY_KEY = "query:{}:set"
LABEL_VALUE = {
MIND_TYPE.PROJECT : 6,
MIND_TYPE.BRAND: 5,
MIND_TYPE.HOSPITAL:4,
MIND_TYPE.DOCTOR:3,
MIND_TYPE.AREA:2,
MIND_TYPE.USER:1,
MIND_TYPE.UNKNOWN:0
}
QUERY_WORD_LABEL_NEED_MODIFIED = {
u"玻尿酸": MIND_TYPE.PROJECT,
u"鼻": MIND_TYPE.PROJECT,
u"眼": MIND_TYPE.PROJECT,
u"嘴": MIND_TYPE.PROJECT,
u"脱毛": MIND_TYPE.PROJECT
}
def label_key(label):
return LABEL_VALUE.get(label)
def get_tips_word_type(query=''):
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
labels.append(MIND_TYPE.UNKNOWN)
if query in QUERY_WORD_LABEL_NEED_MODIFIED:
labels.append(MIND_TYPE.PROJECT)
labels.sort(key=label_key, reverse=True)
return labels[0]
......@@ -3,7 +3,13 @@
"properties": {
"id":{"type":"text"},
"suggest":{
"type":"completion"
"type":"completion",
"contexts":[
{
"name":"is_online",
"type": "category"
}
]
},
"suggest_type":{"type":"long"},//0-汉字,1-汉字全拼,2-拼音,3-拼音全拼,4-拼音简写,5-拼音简写全拼
"tips_name_type":{"type":"long"},//tips数据所属类型,0-tag,1-hospital,2-doctor,3-wiki
......
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import operator
from django.db import models
from django.db.models import Q
from django.db import transaction
import logging
import traceback
from gm_types.gaia import TAG_ALERT_TYPE
from django.utils import timezone
from gm_upload import IMG_TYPE, ImgUrlField
from gm_types.gaia import WORDREL_CATEGORY
class WordRel(models.Model):
class Meta:
app_label = 'api'
db_table = 'api_wordrel'
keyword = models.CharField(u'关键字', max_length=50)
category = models.CharField(u'分类', max_length=5, choices=WORDREL_CATEGORY)
created_time = models.DateTimeField(verbose_name=u'创建时间', auto_now_add=True)
updated_time = models.DateTimeField(verbose_name=u'最后更新时间', auto_now=True)
class WordRelResemble(models.Model):
"""近义词数据表
"""
class Meta:
app_label = 'api'
db_table = 'api_wordrelresemble'
wordrel = models.ForeignKey(WordRel, related_name='all_resembles')
word = models.CharField(u'近义词', max_length=50, db_index=True)
......@@ -21,7 +21,10 @@ from trans2es.utils.collectwiki_transfer import CollectWikiTransfer
from trans2es.utils.brandwiki_transfer import BrandWikiTransfer
from trans2es.utils.productwiki_transfer import ProduceWikiTransfer
from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.wordresemble import WordResemble
from libs.es import ESPerform
from libs.tools import tzlc,getMd5Digest
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
......@@ -132,8 +135,9 @@ class TypeInfo(object):
))
continue
data = self.get_data_func(instance)
(item_dict, suggest_list) = data
resemble_list = WordResemble.get_word_resemble_list(str(instance.name))
(item_dict, suggest_list) = data
for suggest_item in suggest_list:
suggest_dict = copy.deepcopy(item_dict)
suggest_dict["suggest_type"] = suggest_item["suggest_type"]
......@@ -141,9 +145,20 @@ class TypeInfo(object):
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"])
"weight": int(suggest_dict["offline_score"]),
"contexts":{
"is_online": suggest_dict["is_online"]
}
}
data_list.append(suggest_dict)
for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
resemble_dict["ori_name"] = resemble_item
resemble_dict["offline_score"] = 0
resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict)
except Exception:
traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
......@@ -262,8 +277,9 @@ def get_type_info_map():
TypeInfo(
name='suggest',
type='hospital_tips', # hospital
model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query,
model=doctor.Hospital,
#query_deferred=lambda: doctor.Doctor.objects.all().filter(doctor_type=DOCTOR_TYPE.OFFICER).query,
query_deferred=lambda: doctor.Hospital.objects.all().query,
get_data_func=HospitalTransfer.get_hospital_suggest_data_list,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
......
......@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class BrandWikiTransfer(object):
......@@ -27,7 +27,7 @@ class BrandWikiTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class CollectWikiTransfer(object):
......@@ -27,7 +27,7 @@ class CollectWikiTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -12,7 +12,7 @@ from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class DoctorTransfer(object):
......@@ -86,7 +86,7 @@ class DoctorTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "doctor"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 2
......
......@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class HospitalTransfer(object):
......@@ -22,11 +22,11 @@ class HospitalTransfer(object):
item_dict = dict()
item_dict["id"] = getMd5Digest(str(instance.name))
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["is_online"] = True
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "hospital"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
......
......@@ -12,7 +12,7 @@ from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class ItemWikiTransfer(object):
......@@ -91,7 +91,7 @@ class ItemWikiTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,7 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class ProduceWikiTransfer(object):
......@@ -27,7 +27,7 @@ class ProduceWikiTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -11,7 +11,7 @@ import json
from django.conf import settings
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
from trans2es.commons.words_utils import QueryWordAttr,get_tips_word_type
class TagTransfer(object):
......@@ -98,7 +98,7 @@ class TagTransfer(object):
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["type_flag"] = get_tips_word_type(instance.name)
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 0
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc,getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
import json
from django.conf import settings
from trans2es.models import wordresemble
class WordResemble(object):
@classmethod
def get_word_resemble_list(cls,keyword):
try:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=keyword)
temp_list = list()
for sql_obj in query_sql_item:
temp_list.extend(list(sql_obj.all_resembles.all().values_list('word',flat=True)))
resemble_list = list()
for item in temp_list:
resemble_list.extend(item.split("、"))
return resemble_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment