Commit a9c8179e authored by 段英荣's avatar 段英荣

Initial commit

parents
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import redis
from django.conf import settings
redis_client = redis.StrictRedis.from_url(settings.REDIS_URL)
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gm_rpcd.all import RPCDFaultException
from gm_types.doris.error import ERROR
from raven.contrib.django.raven_compat.models import client as _sentry_client
def raise_error(error_code, message=None):
assert error_code != 0
if message is None:
message = ERROR.getDesc(error_code)
raise RPCDFaultException(code=error_code, message=message)
def logging_exception(send_to_sentry=True):
try:
# send exception info to sentry, fail silently
_sentry_client.captureException()
except:
pass
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
import os.path
import re
import json
from elasticsearch import Elasticsearch
import elasticsearch.helpers
from django.conf import settings
class ESPerform(object):
cli_obj = None
cli_info_list = settings.ES_INFO_LIST
index_prefix = settings.ES_INDEX_PREFIX
@classmethod
def get_cli(cls):
try:
cls.cli_obj = Elasticsearch(cls.cli_info_list)
return cls.cli_obj
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
@classmethod
def get_official_index_name(cls,sub_index_name,index_flag=None):
"""
:remark:get official es index name
:param sub_index_name:
:param index_flag:
:return:
"""
try:
assert (index_flag in [None,"read","write"])
official_index_name = cls.index_prefix + "-" + sub_index_name
if index_flag:
official_index_name += "-" + index_flag
return official_index_name
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
@classmethod
def __load_mapping(cls,doc_type):
try:
mapping_file_path = os.path.join(
os.path.dirname(__file__),
'..', 'trans2es','mapping', '%s.json' % (doc_type,))
mapping = ''
with open(mapping_file_path, 'r') as f:
for line in f:
# 去掉注释
mapping += re.sub(r'//.*$', '', line)
mapping = json.loads(mapping)
return mapping
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
@classmethod
def create_index(cls,es_cli,sub_index_name):
"""
:remark: create es index,alias index
:param sub_index_name:
:return:
"""
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name)
index_exist = es_cli.indices.exists(official_index_name)
if not index_exist:
es_cli.indices.create(official_index_name)
read_alias_name = cls.get_official_index_name(sub_index_name,"read")
es_cli.indices.put_alias(official_index_name,read_alias_name)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
es_cli.indices.put_alias(official_index_name,write_alias_name)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"):
"""
:remark: put index mapping
:param es_cli:
:param sub_index_name:
:param mapping_type:
:return:
"""
try:
assert (es_cli is not None)
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist:
return False
mapping_dict = cls.__load_mapping(sub_index_name)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def put_indices_template(cls,es_cli,template_file_name, template_name):
"""
:remark put index template
:param es_cli:
:param template_file_name:
:param template_name:
:return:
"""
try:
assert (es_cli is not None)
mapping_dict = cls.__load_mapping(template_file_name)
es_cli.indices.put_template(name=template_name,body=mapping_dict)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def es_helpers_bulk(cls,es_cli,data_list,sub_index_name,auto_create_index=False,doc_type="_doc"):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,bulk data error!" % official_index_name)
return False
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
bulk_actions = []
for data in data_list:
bulk_actions.append({
'_op_type': 'index',
'_index': official_index_name,
'_type': doc_type,
'_id': data['id'],
'_source': data,
})
elasticsearch.helpers.bulk(es_cli,bulk_actions)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def get_search_results(cls, es_cli,sub_index_name,query_body,offset=0,size=10,
auto_create_index=False,doc_type="_doc",aggregations_query=False,is_suggest_request=False,batch_search=False):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name,"read")
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
logging.error("index:%s is not existing,get_search_results error!" % official_index_name)
return None
else:
cls.create_index(es_cli,sub_index_name)
cls.put_index_mapping(es_cli,sub_index_name)
logging.info("duan add,query_body:%s" % str(query_body).encode("utf-8"))
if not batch_search:
res = es_cli.search(index=official_index_name,doc_type=doc_type,body=query_body,from_=offset,size=size)
if is_suggest_request:
return res
else:
result_dict = {
"total_count":res["hits"]["total"],
"hits":res["hits"]["hits"]
}
if aggregations_query:
result_dict["aggregations"] = res["aggregations"]
return result_dict
else:
res = es_cli.msearch(body=query_body,index=official_index_name, doc_type=doc_type)
logging.info("duan add,msearch res:%s" % str(res))
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]}
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function, absolute_import
import six
import random
from django.db import models
class ITableChunk(object):
def __iter__(self):
raise NotImplementedError
def get_pk_start(self):
raise NotImplementedError
def get_pk_stop(self):
raise NotImplementedError
class TableScannerChunk(ITableChunk):
def __init__(self, data_list, pk_start, pk_stop):
self._data_list = data_list
self._pk_start = pk_start
self._pk_stop = pk_stop
def __iter__(self):
return iter(self._data_list)
def get_pk_start(self):
return self._pk_start
def get_pk_stop(self):
return self._pk_stop
class TableScannerChunkIterator(object):
def __init__(self, scanner, last_pk, chunk_size):
assert isinstance(scanner, TableScanner)
self._scanner = scanner
self._last_pk = last_pk
self._chunk_size = chunk_size
def __iter__(self):
while True:
last_pk = self._last_pk
data_list, next_last_pk = self._scanner.get_next_data_list(last_pk=last_pk, chunk_size=self._chunk_size)
self._last_pk = next_last_pk
yield TableScannerChunk(data_list=data_list, pk_start=last_pk, pk_stop=next_last_pk)
class TableScannerFlattenIterator(object):
def __init__(self, scanner, last_pk):
assert isinstance(scanner, TableScanner)
self._scanner = scanner
self._last_pk = last_pk
def __iter__(self):
while True:
data_list, next_last_pk = self._scanner.get_next_data_list(last_pk=self._last_pk)
self._last_pk = next_last_pk
for data in data_list:
yield data
class TableScanner(object):
def __init__(self, queryset):
assert isinstance(queryset, models.QuerySet)
self._model = queryset.model
self._query = queryset.query
self._db_table = self._model._meta.db_table
@property
def queryset(self):
return models.QuerySet(model=self._model, query=self._query)
@property
def model_queryset(self):
return self._model.objects
def get_random_pk(self):
count = self.model_queryset.count()
if count == 0:
return None
index = random.randrange(count)
try:
return self.model_queryset.values_list('pk', flat=True)[index]
except IndexError:
return None
def get_next_data_list(self, last_pk=None, chunk_size=1):
qs = self.queryset.order_by('pk')
if last_pk is not None:
qs = qs.filter(pk__gt=last_pk)
data_list = list(qs[:chunk_size])
if len(data_list) == 0:
next_last_pk = None
else:
next_last_pk = data_list[-1].pk
return data_list, next_last_pk
def __iter__(self):
pk = self.get_random_pk()
return iter(TableScannerFlattenIterator(scanner=self, last_pk=pk))
def chunks(self, chunk_size):
pk = self.get_random_pk()
return iter(TableScannerChunkIterator(scanner=self, last_pk=pk, chunk_size=chunk_size))
class TableSlicerChunk(ITableChunk):
"""
this object can be pickled and transferred to another process.
"""
def __init__(self, model, query, pk_start, pk_stop):
self._model = model
self._query = query
self._pk_start = pk_start
self._pk_stop = pk_stop
def __iter__(self):
data_list = self.__get_range(self._model, self._query, pk_start=self._pk_start, pk_stop=self._pk_stop)
return iter(data_list)
def get_pk_start(self):
return self._pk_start
def get_pk_stop(self):
return self._pk_stop
@classmethod
def __get_range(cls, model, query, pk_start, pk_stop):
qs = models.QuerySet(model=model, query=query)
if pk_start is not None:
qs = qs.filter(pk__gte=pk_start)
if pk_stop is not None:
qs = qs.filter(pk__lt=pk_stop)
return list(qs)
class TableSlicer(object):
def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None):
assert isinstance(queryset, models.QuerySet)
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
assert chunk_count is None or isinstance(chunk_count, six.integer_types)
assert sep_list is None or isinstance(sep_list, list)
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
if sep_list is not None:
sep_list = list(sep_list)
else:
count = queryset.count()
if chunk_size is None:
chunk_size = count / chunk_count
index_list = list(range(0, count, chunk_size))
sep_list = [
queryset.order_by('pk').values_list('pk', flat=True)[index]
for index in index_list
]
self._model = queryset.model
self._query = queryset.query
self._sep_list = [None] + sep_list + [None]
def chunks(self):
reversed_sep_list = list(reversed(self._sep_list))
for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i+1]
pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
class TableStreamingSlicer(object):
def __init__(self, queryset, chunk_size=None):
assert isinstance(queryset, models.QuerySet)
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
self._model = queryset.model
self._query = queryset.query
self._chunk_size = chunk_size
self._descend = False
def chunks(self):
last_pk = None
queryset = models.QuerySet(model=self._model, query=self._query).order_by('pk')
value_list = queryset.values_list('pk', flat=True)
while True:
current_value_list = value_list
if last_pk is not None:
current_value_list = current_value_list.filter(pk__gt=last_pk)
try:
next_last_pk = current_value_list[self._chunk_size-1]
except IndexError:
next_last_pk = None
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=last_pk, pk_stop=next_last_pk)
last_pk = next_last_pk
if last_pk is None:
break
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from django.conf import settings
from pytz import timezone
from datetime import datetime
def tzlc(dt, truncate_to_sec=True):
if dt is None:
return None
if truncate_to_sec:
dt = dt.replace(microsecond=0)
if dt.tzinfo is None:
return timezone(settings.TIME_ZONE).localize(dt)
else:
return timezone(settings.TIME_ZONE).normalize(dt)
\ No newline at end of file
#!/usr/bin/env python
import os
import sys
if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "search_tips.settings")
try:
from django.core.management import execute_from_command_line
except ImportError:
# The above import may fail for some other reason. Ensure that the
# issue is really that Django is missing to avoid masking other
# exceptions on Python 2.
try:
import django
except ImportError:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
)
raise
execute_from_command_line(sys.argv)
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import logging
LOG_DIR = '/data/log/search_tips/app/'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s',
filename=os.path.join(LOG_DIR, 'filelog.log'),
)
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'filters': {
'require_debug_true': {
'()': 'django.utils.log.RequireDebugTrue',
},
},
'formatters': {
'verbose': {
'format': '%(asctime)s %(levelname)s %(module)s.%(funcName)s Line:%(lineno)d %(message)s'
},
'simple': {
'format': '%(levelname)s %(message)s'
},
'profile': {
'format': '%(asctime)s %(message)s'
},
'raw': {
'format': '%(message)s'
}
},
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
# 默认的服务器Log(保存到log/filelog.log中, 通过linux的logrotate来处理日志的分割
'default': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'filelog.log'),
'formatter': 'verbose',
},
# 默认的服务器ERROR log
'default_err': {
'level': 'ERROR',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'error_logger.log'),
'formatter': 'verbose',
},
'exception_logger': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'exception_logger.log'),
'formatter': 'verbose',
},
'tracer_handler': {
'level': 'INFO',
'class': 'logging.handlers.RotatingFileHandler',
'filename': os.path.join(LOG_DIR, 'tracer.log'),
'formatter': 'raw'
},
},
'loggers': {
'django': {
'handlers': ['default'],
'propagate': True,
'level': 'INFO',
},
'django.request': {
'handlers': ['default_err'],
'level': 'ERROR',
'propagate': False,
},
'exception_logger': {
'handlers': ['exception_logger'],
'level': 'INFO',
'propagate': False,
},
'gm_tracer.subscribe': {
'handlers': ['tracer_handler'],
'propagate': False,
'level': 'INFO'
},
},
}
"""
Django settings for search_tips project.
Generated by 'django-admin startproject' using Django 1.10.6.
For more information on this file, see
https://docs.djangoproject.com/en/1.10/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.10/ref/settings/
"""
import os
from .log_settings import *
from datetime import timedelta
from celery.schedules import crontab
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'e%$v6snev0807=t0@gk_n2#r5m6r1h(eil6cp^y3ub@ja@gk_t'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = []
SENTRY_CELERY_ENDPOINT="http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# CELERY_SEND_EVENTS = True
# CELERY_SEND_TASK_SENT_EVENT = True
#
# CELERY_DEFAULT_EXCHANGE = 'celery'
# CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
# CELERY_DEFAULT_ROUTING_KEY = 'celery'
#
# CELERY_QUEUES = {
# 'celery': {
# 'exchange': CELERY_DEFAULT_EXCHANGE,
# 'routing_key': CELERY_DEFAULT_ROUTING_KEY,
# },
# 'order': {
# 'exchange': 'order',
# 'routing_key': 'order',
# },
# }
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
# Application definition
INSTALLED_APPS = [
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
]
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
ROOT_URLCONF = 'search_tips.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'search_tips.wsgi.application'
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'zhengxing_test',
'USER': 'work',
'PASSWORD': 'workwork',
'HOST': 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com',
'PORT': '',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
}
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX="gm-dbmw"
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'Asia/Shanghai'
USE_I18N = True
USE_L10N = True
#USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_URL = '/static/'
"""search_tips URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/1.10/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: url(r'^$', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.conf.urls import url, include
2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls'))
"""
from django.conf.urls import url
from django.contrib import admin
urlpatterns = [
url(r'^admin/', admin.site.urls),
]
"""
WSGI config for search_tips project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "search_tips.settings")
application = get_wsgi_application()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment