Commit a955f0d8 authored by lixiaofang's avatar lixiaofang

add;

parent 9b708f86
# !/usr/bin/env python
# encoding=utf-8
from __future__ import absolute_import
import os
# set the default Django settings module for the 'celery' program.
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
import raven
from raven.contrib.celery import register_signal, register_logger_signal
from celery import Celery
from django.conf import settings
class Celery(Celery):
"""wrap for celery.Celery."""
def on_configure(self):
# check if sentry settings provided
if not settings.SENTRY_CELERY_ENDPOINT:
return
client = raven.Client(settings.SENTRY_CELERY_ENDPOINT)
# register a custom filter to filter out duplicate logs
register_logger_signal(client)
# hook into the Celery error handler
register_signal(client)
app = Celery('search_tips_tasks')
# Using a string here means the worker will not have to
# pickle the object when using Windows.
app.config_from_object('django.conf:settings')
app.autodiscover_tasks(lambda: settings.INSTALLED_APPS)
# -*- coding: UTF-8 -*-
import logging
from celery import shared_task
from trans2es.type_info import get_type_info_map
import logging
import traceback
import base64
from libs.cache import redis_client
import json
from trans2es.models import wordresemble
@shared_task
def set_data_to_redis():
try:
QUERY_KEY = "query:{}:search_tips"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
result = wordresemble.WordRel.objects.all().values("keyword", "id")
for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=wordrm["keyword"])
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
logging.info("get wordrm:%s" % wordrm["keyword"])
logging.info("get words:%s" % words)
query_base64 = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
# 存储近义词
query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, wordrm["keyword"])
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
if words:
for w in words:
logging.info("get word_pops:%s" % words)
value_name = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -2,14 +2,16 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
import base64
from libs.es import ESPerform
from libs.cache import redis_client
from libs.tools import g_hospital_pos_dict
from libs.tools import point_distance
from gm_rpcd.all import bind
from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
# lat经度
......@@ -46,6 +48,8 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
ret_list = list()
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=q,
offset=offset, size=size, is_suggest_request=True)
logging.info("get result_dict:%s" % result_dict)
for tips_item in result_dict["suggest"]["tips-suggest"]:
for hit_item in tips_item["options"]:
......@@ -84,7 +88,26 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
hit_item["_source"]["results_num"] else ""
ret_list.append(hit_item["_source"])
return ret_list
if len(result_dict["suggest"]["tips-suggest"]) >= 50:
return ret_list
else:
QUERY_KEY = "query:{}:search_tips"
query_base64 = base64.b64encode(query.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
logging.info("get key:%s" % key)
labels = list(map(lambda x: x.decode("utf8"), list(redis_client.smembers(key))))
logging.info("get labels:%s" % labels)
for i in labels:
ori = i.split(":")[0]
result_num = i.split(":")[1]
ori_name = base64.b64decode(ori.encode('utf8')).decode('utf8')
ret_list.append(
{"results_num": result_num, "ori_name": ori_name, "id": None, "is_online": True, "offline_score": 0,
"type_flag": get_tips_word_type(ori_name), "highlight_name": ori_name, "describe": ""})
return ret_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
......@@ -2,4 +2,7 @@ from __future__ import unicode_literals, absolute_import, print_function
import pymysql
from _celery import app as celery_app
pymysql.install_as_MySQLdb()
\ No newline at end of file
from __future__ import absolute_import, unicode_literals
import os
from celery import Celery
from django.conf import settings
# set the default Django settings module for the 'celery' program.
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'search_tips.settings')
app = Celery('search_tips')
# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object('django.conf:settings', namespace='CELERY')
# Load task modules from all registered Django app configs.
app.autodiscover_tasks()
app.conf.broker_url = settings.CELERY_BROKER_URL
@app.task(bind=True)
def debug_task(self):
print('Request: {0!r}'.format(self.request))
......@@ -48,7 +48,7 @@ DEBUG = True
# }
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
# CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
# Application definition
......
# -*- coding:utf-8 -*-
from datetime import timedelta
from celery.schedules import crontab
ALLOWED_HOSTS = ["192.168.78.2"]
SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t'
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# REDIS_URL = "redis://127.0.0.1:6379/1"
BROKER_URL = "redis://redis.paas.env:6379/8"
REDIS_URL = "redis://redis.paas.env:6379/13"
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_BROKER_URL = "redis://redis.paas.env:6379/8"
DATABASES = {
'default': {
# 'ENGINE': 'django.db.backends.mysql',
# 'NAME': 'zhengxing_test',
# 'USER': 'work',
# 'PASSWORD': 'workwork',
# 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
# 'PORT': '',
# 'OPTIONS': {
# "init_command": "SET foreign_key_checks = 0;",
# "charset": "utf8mb4",
# }
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'zhengxing_test',
'USER': 'work',
'PASSWORD': 'Gengmei1',
'HOST': 'rm-2ze0v6uua2hl9he8edo.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
},
'bran': {
'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
'NAME': 'bran_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4", # 为了支持emoji表情
},
}
}
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
# DATABASES = {
# 'default': {
# # 'ENGINE': 'django.db.backends.mysql',
# # 'NAME': 'zhengxing_test',
# # 'USER': 'work',
# # 'PASSWORD': 'workwork',
# # 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
# # 'PORT': '',
# # 'OPTIONS': {
# # "init_command": "SET foreign_key_checks = 0;",
# # "charset": "utf8mb4",
# # }
# # 'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
# # 'NAME': 'zhengxing_test',
# # 'USER': 'work',
# # 'PASSWORD': 'Gengmei1',
# # 'HOST': 'rm-2ze0v6uua2hl9he8edo.mysql.rds.aliyuncs.com',
# # 'PORT': '3306',
# # 'OPTIONS': {
# # "init_command": "SET foreign_key_checks = 0;",
# # "charset": "utf8mb4", # 为了支持emoji表情
# },
# },
# 'bran': {
# 'ENGINE': 'django.db.backends.mysql', # 设置为mysql数据库
# 'NAME': 'bran_test',
# 'USER': 'work',
# 'PASSWORD': 'workwork',
# 'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
# 'PORT': '3306',
# 'OPTIONS': {
# "init_command": "SET foreign_key_checks = 0;",
# "charset": "utf8mb4", # 为了支持emoji表情
# },
# }
# }
CELERYBEAT_SCHEDULE = {
'set_tag_count': {
'task': 'injection.data_sync.tasks.set_data_to_redis',
'schedule': timedelta(seconds=10),
'args': ()
},
}
# ES_INFO_LIST = [
# {
......
......@@ -32,15 +32,6 @@ class Command(BaseCommand):
def handle(self, *args, **options):
try:
if not options['type']:
raise logging.info('type name must be specified')
elif not (options['type'] in [
'suggest',
]):
raise logging.info(
'unknown type name, type must be one of [suggest]')
es_cli = ESPerform.get_cli()
type_name = "suggest"
......
......@@ -8,7 +8,6 @@
"type": "completion",
"analyzer": "keyword",
"search_analyzer": "keyword",
"skip_duplicates":"true",
"contexts": [
{
"name": "is_online",
......
......@@ -147,39 +147,38 @@ class TypeInfo(object):
suggest_dict["suggest_type"] = suggest_item["suggest_type"]
suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"]
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
if suggest_item["suggest_type"] == 1 and item_dict["tips_name_type"] == 4:
for resemble in resemble_list:
suggest_item["input"].append(str(resemble))
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]),
"contexts": {
"is_online": suggest_dict["is_online"]
}
}
else:
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]),
"contexts": {
"is_online": suggest_dict["is_online"]
}
# if suggest_item["suggest_type"] == 1 and item_dict["tips_name_type"] == 4:
# for resemble in resemble_list:
# suggest_item["input"].append(str(resemble))
#
# suggest_dict["suggest"] = {
# "input": suggest_item["input"],
# "weight": int(suggest_dict["offline_score"]),
# "contexts": {
# "is_online": suggest_dict["is_online"]
# }
# }
# else:
suggest_dict["suggest"] = {
"input": suggest_item["input"],
"weight": int(suggest_dict["offline_score"]),
"contexts": {
"is_online": suggest_dict["is_online"]
}
}
data_list.append(suggest_dict)
if item_dict["tips_name_type"] != 4:
for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
resemble_dict["ori_name"] = resemble_item
resemble_dict["results_num"] = QueryWordAttr.get_query_results_num(resemble_item)
resemble_dict["offline_score"] = 0
resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict)
else:
pass
# if item_dict["tips_name_type"] != 4:
for resemble_item in resemble_list:
resemble_dict = copy.deepcopy(suggest_dict)
resemble_dict["id"] = suggest_dict["id"] + "_" + getMd5Digest(resemble_item)
resemble_dict["ori_name"] = resemble_item
resemble_dict["results_num"] = QueryWordAttr.get_query_results_num(resemble_item)
resemble_dict["offline_score"] = 0
resemble_dict["suggest"]["weight"] = 0
data_list.append(resemble_dict)
# else:
# pass
except Exception:
traceback.print_exc()
logging.exception('bulk_get_data for name={}, doc_type={}, pk={}'.format(
......
......@@ -4,6 +4,7 @@ import os
import sys
import logging
import traceback
import base64
from libs.tools import tzlc, getMd5Digest
from libs.es import ESPerform
from libs.cache import redis_client
......@@ -140,3 +141,65 @@ class WordResemble(object):
redis_client.hset(tips_num_redis_key_prefix, tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def set_data_to_redis(cls):
try:
QUERY_KEY = "query:{}:search_tips"
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
result = wordresemble.WordRel.objects.all().values("keyword", "id")
for wordrm in result:
query_sql_item = wordresemble.WordRel.objects.filter(keyword=wordrm["keyword"])
for sql_obj in query_sql_item:
words = list(sql_obj.all_resembles.all().values_list('word', flat=True))
for items in words:
# 先存储本体词
logging.info("get wordrm:%s" % wordrm["keyword"])
logging.info("get words:%s" % words)
query_base64 = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(items.encode('utf8')).decode('utf8')
# 获取本体词的结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, items)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储到Redis
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
# 存储近义词
query_base64 = base64.b64encode(items.encode('utf8')).decode('utf8')
key = QUERY_KEY.format(query_base64)
value_name = base64.b64encode(wordrm["keyword"].encode('utf8')).decode('utf8')
# 获取结果
redis_data = redis_client.hget(tips_num_redis_key_prefix, wordrm["keyword"])
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
# 存储结果
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
if words:
for w in words:
logging.info("get word_pops:%s" % words)
value_name = base64.b64encode(w.encode('utf8')).decode('utf8')
redis_data = redis_client.hget(tips_num_redis_key_prefix, w)
redis_val_dict = json.loads(str(redis_data, encoding="utf-8")) if redis_data else {}
total_count = 0
if 't' in redis_val_dict:
total_count = int(redis_val_dict['t'])
value = value_name + str(":" + str(total_count))
redis_client.sadd(key, value)
logging.info("get value:%s" % value)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment