Commit a955f0d8 authored by lixiaofang's avatar lixiaofang

add;

parent 9b708f86
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
import os
# set the default Django settings module for the 'celery' program.
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
import raven
from raven.contrib.celery import register_signal, register_logger_signal
from celery import Celery
from django.conf import settings
class Celery(Celery):
"""wrap for celery.Celery."""
def on_configure(self):
# check if sentry settings provided
if not settings.SENTRY_CELERY_ENDPOINT:
return
client = raven.Client(settings.SENTRY_CELERY_ENDPOINT)
# register a custom filter to filter out duplicate logs
register_logger_signal(client)
# hook into the Celery error handler
register_signal(client)
app = Celery('search_tips_tasks')
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
# -*- coding: UTF-8 -*-
import logging
from celery import shared_task
from trans2es.type_info import get_type_info_map
import logging
import traceback
import base64
from libs.cache import redis_client
import json
from trans2es.models import wordresemble
@shared_task
def set_data_to_redis():
try:
QUERY_KEY = "query:{}:search_tips"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
result = wordresemble.WordRel.objects.all().values("keyword", "id")
for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=wordrm["keyword"])
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
logging.info("get wordrm:%s" % wordrm["keyword"])
logging.info("get words:%s" % words)
query_base64 = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
# 存储近义词
query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, wordrm["keyword"])
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
if words:
for w in words:
logging.info("get word_pops:%s" % words)
value_name = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -2,14 +2,16 @@ ...@@ -2,14 +2,16 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging import logging
import traceback import traceback
import json import json
import base64
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance from libs.tools import point_distance
from gm_rpcd.all import bind
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
# lat经度 # lat经度
...@@ -46,6 +48,8 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50): ...@@ -46,6 +48,8 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
ret_list = list() ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
offset=offset, size=size, is_suggest_request=True) offset=offset, size=size, is_suggest_request=True)
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]: for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]: for hit_item in tips_item["options"]:
...@@ -84,7 +88,26 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50): ...@@ -84,7 +88,26 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
hit_item["_source"]["results_num"] else "" hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"]) ret_list.append(hit_item["_source"])
return ret_list if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list
else:
QUERY_KEY = "query:{}:search_tips"
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
logging.info("get key:%s" % key)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
logging.info("get labels:%s" % labels)
for i in labels:
ori = i.split(":")[0]
result_num = i.split(":")[1]
ori_name = base64.b64decode(ori.encode('utf8')).decode('utf8')
ret_list.append(
{"results_num": result_num, "ori_name": ori_name, "id": None, "is_online": True, "offline_score": 0,
"type_flag": get_tips_word_type(ori_name), "highlight_name": ori_name, "describe": ""})
return ret_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
...@@ -2,4 +2,7 @@ from __future__ import unicode_literals, absolute_import, print_function ...@@ -2,4 +2,7 @@ from __future__ import unicode_literals, absolute_import, print_function
import pymysql import pymysql
from _celery import app as celery_app
pymysql.install_as_MySQLdb() pymysql.install_as_MySQLdb()
\ No newline at end of file
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from django.conf import settings
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
app = Celery('search_tips')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.conf.broker_url = settings.CELERY_BROKER_URL
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
...@@ -48,7 +48,7 @@ DEBUG = True ...@@ -48,7 +48,7 @@ DEBUG = True
# } # }
CELERY_TIMEZONE = 'Asia/Shanghai' CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter'] # CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
# Application definition # Application definition
......
# -*- coding:utf-8 -*- # -*- coding:utf-8 -*-
from datetime import timedelta
from celery.schedules import crontab
ALLOWED_HOSTS = ["192.168.78.2"] ALLOWED_HOSTS = ["192.168.78.2"]
SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t' SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t'
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140" SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8" BROKER_URL = "redis://redis.paas.env:6379/8"
# REDIS_URL = "redis://127.0.0.1:6379/1"
REDIS_URL = "redis://redis.paas.env:6379/13" REDIS_URL = "redis://redis.paas.env:6379/13"
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8" CELERY_BROKER_URL = "redis://redis.paas.env:6379/8"
DATABASES = {
'default': {
# 'ENGINE': 'django.db.backends.mysql',
# 'NAME': 'zhengxing_test',
# 'USER': 'work',
# 'PASSWORD': 'workwork',
# 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
# 'PORT': '',
# 'OPTIONS': {
# "init_command": "SET foreign_key_checks = 0;",
# "charset": "utf8mb4",
# }
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'zhengxing_test',
'USER': 'work',
'PASSWORD': 'Gengmei1',
'HOST': 'rm-2ze0v6uua2hl9he8edo.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
},
'bran': {
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'bran_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
}
}
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
# DATABASES = { CELERYBEAT_SCHEDULE = {
# 'default': { 'set_tag_count': {
# # 'ENGINE': 'django.db.backends.mysql', 'task': 'injection.data_sync.tasks.set_data_to_redis',
# # 'NAME': 'zhengxing_test', 'schedule': timedelta(seconds=10),
# # 'USER': 'work', 'args': ()
# # 'PASSWORD': 'workwork', },
# # 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', }
# # 'PORT': '',
# # 'OPTIONS': {
# # "init_command": "SET foreign_key_checks = 0;",
# # "charset": "utf8mb4",
# # }
# # 'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
# # 'NAME': 'zhengxing_test',
# # 'USER': 'work',
# # 'PASSWORD': 'Gengmei1',
# # 'HOST': 'rm-2ze0v6uua2hl9he8edo.mysql.rds.aliyuncs.com',
# # 'PORT': '3306',
# # 'OPTIONS': {
# # "init_command": "SET foreign_key_checks = 0;",
# # "charset": "utf8mb4", # 为了支持emoji表情
# },
# },
# 'bran': {
# 'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
# 'NAME': 'bran_test',
# 'USER': 'work',
# 'PASSWORD': 'workwork',
# 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
# 'PORT': '3306',
# 'OPTIONS': {
# "init_command": "SET foreign_key_checks = 0;",
# "charset": "utf8mb4", # 为了支持emoji表情
# },
# }
# }
# ES_INFO_LIST = [ # ES_INFO_LIST = [
# { # {
......
...@@ -32,15 +32,6 @@ class Command(BaseCommand): ...@@ -32,15 +32,6 @@ class Command(BaseCommand):
def handle(self, *args, **options): def handle(self, *args, **options):
try: try:
if not options['type']:
raise logging.info('type name must be specified')
elif not (options['type'] in [
'suggest',
]):
raise logging.info(
'unknown type name, type must be one of [suggest]')
es_cli = ESPerform.get_cli() es_cli = ESPerform.get_cli()
type_name = "suggest" type_name = "suggest"
......
...@@ -8,7 +8,6 @@ ...@@ -8,7 +8,6 @@
"type": "completion", "type": "completion",
"analyzer": "keyword", "analyzer": "keyword",
"search_analyzer": "keyword", "search_analyzer": "keyword",
"skip_duplicates":"true",
"contexts": [ "contexts": [
{ {
"name": "is_online", "name": "is_online",
......
...@@ -147,39 +147,38 @@ class TypeInfo(object): ...@@ -147,39 +147,38 @@ class TypeInfo(object):
suggest_dict["suggest_type"] = suggest_item["suggest_type"] suggest_dict["suggest_type"] = suggest_item["suggest_type"]
suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"] suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"]
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"]) suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
if suggest_item["suggest_type"] == 1 and item_dict["tips_name_type"] == 4: # if suggest_item["suggest_type"] == 1 and item_dict["tips_name_type"] == 4:
for resemble in resemble_list: # for resemble in resemble_list:
suggest_item["input"].append(str(resemble)) # suggest_item["input"].append(str(resemble))
#
suggest_dict["suggest"] = { # suggest_dict["suggest"] = {
"input": suggest_item["input"], # "input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]), # "weight": int(suggest_dict["offline_score"]),
"contexts": { # "contexts": {
"is_online": suggest_dict["is_online"] # "is_online": suggest_dict["is_online"]
} # }
} # }
else: # else:
suggest_dict["suggest"] = { suggest_dict["suggest"] = {
"input": suggest_item["input"], "input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]), "weight": int(suggest_dict["offline_score"]),
"contexts": { "contexts": {
"is_online": suggest_dict["is_online"] "is_online": suggest_dict["is_online"]
}
} }
}
data_list.append(suggest_dict) data_list.append(suggest_dict)
if item_dict["tips_name_type"] != 4: # if item_dict["tips_name_type"] != 4:
for resemble_item in resemble_list: for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict) resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item) resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
resemble_dict["ori_name"] = resemble_item resemble_dict["ori_name"] = resemble_item
resemble_dict["results_num"] = QueryWordAttr.get_query_results_num(resemble_item) resemble_dict["results_num"] = QueryWordAttr.get_query_results_num(resemble_item)
resemble_dict["offline_score"] = 0 resemble_dict["offline_score"] = 0
resemble_dict["suggest"]["weight"] = 0 resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict) data_list.append(resemble_dict)
else: # else:
pass # pass
except Exception: except Exception:
traceback.print_exc() traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format( logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
import sys import sys
import logging import logging
import traceback import traceback
import base64
from libs.tools import tzlc, getMd5Digest from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform from libs.es import ESPerform
from libs.cache import redis_client from libs.cache import redis_client
...@@ -140,3 +141,65 @@ class WordResemble(object): ...@@ -140,3 +141,65 @@ class WordResemble(object):
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict)) redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls):
try:
QUERY_KEY = "query:{}:search_tips"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
result = wordresemble.WordRel.objects.all().values("keyword", "id")
for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=wordrm["keyword"])
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
logging.info("get wordrm:%s" % wordrm["keyword"])
logging.info("get words:%s" % words)
query_base64 = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
# 存储近义词
query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, wordrm["keyword"])
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
if words:
for w in words:
logging.info("get word_pops:%s" % words)
value_name = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment