Commit 0fbd2572 authored by 段英荣's avatar 段英荣

modify

parent 278f6a18
......@@ -7,8 +7,18 @@ import traceback
import pypinyin
from pypinyin import lazy_pinyin
from gm_types.gaia import SERVICE_ITEM_PRICE_TYPE, DOCTOR_TYPE
import uuid
def uuid4():
"""
:return:
"""
return uuid.uuid4().hex
def get_tips_suggest_list(instance_cn_name):
try:
ch_full_weight = 6.0
......
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import logging
import traceback
import json
from libs.cache import redis_client
from trans2es.models.query_word_conversion import TagConversion,DoctorConversion,HospitalConversion
class TagTab(object):
Weight = {
'search': 0.4,
'trans': 0.6
}
SearchScore = {
0.7: 100,
0.1: 80,
0.05: 60,
0.01: 40,
0: 20,
}
TransScore = {
0.7: 100,
0.5: 80,
0.3: 60,
0.1: 40,
0: 20
}
def cal_score(self, search_rate, conversion_rate):
s1 = self._cal_score(search_rate, 'SearchScore') * self.Weight['search']
s2 = self._cal_score(conversion_rate, 'TransScore') * self.Weight['trans']
return s1 + s2
def _cal_score(self, item, type_):
item *= 100.0
scoreweight = getattr(self, type_)
for k in sorted(scoreweight.keys(), reverse=True):
if item >= k:
return scoreweight[k]
class DocTab(TagTab):
SearchScore = {
0.04: 100,
0.01: 80,
0.001: 60,
0.0002: 40,
0: 20,
}
TransScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
class HosTab(TagTab):
SearchScore = {
0.47: 100,
0.2: 80,
0.1: 60,
0.01: 40,
0: 20
}
TransScore = {
1: 100,
0.45: 90,
0.27: 80,
0.21: 70,
0.15: 60,
0.12: 50,
0.09: 40,
0.06: 30,
0.04: 20,
0: 10,
}
class QueryWordAttr(object):
# 获取TagConversion表最新的日期
tag_latest_date = None
doctor_latest_date = None
hospital_latest_date = None
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod
def get_project_query_word_weight(cls,name):
try:
if not cls.tag_latest_date:
cls.tag_latest_date = TagConversion.objects.latest('update_date').update_date
tag_query_results = TagConversion.objects.filter(query=name,update_date=cls.tag_latest_date)
total_score = 0.0
for query_item in tag_query_results:
item_score = TagTab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_doctor_query_word_weight(cls, name):
try:
if not cls.doctor_latest_date:
cls.doctor_latest_date = DoctorConversion.objects.latest('update_date').update_date
doc_query_results = DoctorConversion.objects.filter(query=name,update_date=cls.doctor_latest_date)
total_score = 0.0
for query_item in doc_query_results:
item_score = DocTab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_hospital_query_word_weight(cls, name):
try:
if not cls.hospital_latest_date:
cls.hospital_latest_date = HospitalConversion.objects.latest('update_date').update_date
hospital_query_results = HospitalConversion.objects.filter(query=name,update_date=cls.hospital_latest_date)
total_score = 0.0
for query_item in hospital_query_results:
item_score = HosTab.cal_score(query_item.search_rate, query_item.conversion_rate)
total_score += item_score
return total_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
@classmethod
def get_query_results_num(cls,name):
try:
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, name)
redis_val_dict = json.loads(str(redis_data)) if redis_data else {}
results_num = redis_val_dict['t'] if 't' in redis_val_dict else 0
return results_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0
tagtab = TagTab()
doctab = DocTab()
hostab = HosTab()
......@@ -23,34 +23,34 @@ import json
class Command(BaseCommand):
args = ''
help = 'dump data to elasticsearch, parallel'
help = 'dump data to redis, parallel'
from optparse import make_option
# option_list = BaseCommand.option_list + (
# make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',default=''),
# make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch', metavar='INDEX_PREFIX'),
# make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
# make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
# make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
# make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
# make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='')
# )
option_list = BaseCommand.option_list + (
make_option('-t', '--type', dest='type', help='type name to dump data to elasticsearch', metavar='TYPE',default=''),
make_option('-i', '--index-prefix', dest='index_prefix', help='index name to dump data to elasticsearch', metavar='INDEX_PREFIX'),
make_option('-p', '--parallel', dest='parallel', help='parallel process count', metavar='PARALLEL'),
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='')
)
def handle(self, *args, **options):
try:
type_name_list = tips_attr_sync_to_redis_type_info_map().keys()
for type_name in type_name_list:
# if len(options["type"]):
# if options["type"] == "all" or type_name==options["type"]:
logging.info("begin sync [%s] data to es!" % type_name)
type_info = tips_attr_sync_to_redis_type_info_map()[type_name]
query_set = type_info.queryset
if len(options["type"]):
if options["type"] == "all" or type_name==options["type"]:
logging.info("begin sync [%s] data to es!" % type_name)
type_info = tips_attr_sync_to_redis_type_info_map()[type_name]
query_set = type_info.queryset
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
for instance in list(chunk):
type_info.get_data_func(instance)
slicer = TableSlicer(queryset=query_set, chunk_size=type_info.bulk_insert_chunk_size)
for chunk in slicer.chunks():
for instance in list(chunk):
type_info.get_data_func(instance)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -11,8 +11,24 @@ import datetime
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
SERVICE_FLAG)
from trans2es.commons.commons import uuid4
class Hospital(models.Model):
class Meta:
verbose_name = u'30. 医院'
verbose_name_plural = u'30. 医院'
db_table = 'api_hospital'
app_label = 'api'
id = models.CharField(max_length=100, primary_key=True, default=uuid4)
name = models.CharField(max_length=100, unique=True, help_text=u"医院名称")
google_loc_lng = models.FloatField(default=0, help_text=u"经度(-180~180)")
google_loc_lat = models.FloatField(default=0, help_text=u"纬度")
baidu_loc_lng = models.FloatField(default=0, help_text=u"经度(-180~180)")
baidu_loc_lat = models.FloatField(default=0, help_text=u"纬度")
class Doctor(models.Model):
class Meta:
......@@ -27,6 +43,7 @@ class Doctor(models.Model):
doctor_type = models.CharField(max_length=1, default=DOCTOR_TYPE.DOCTOR,
choices=DOCTOR_TYPE, null=False,
verbose_name=u'医生类型')
hospital = models.ForeignKey(Hospital, null=True, blank=False, related_name="doctor_hospital", verbose_name=u"医院")
class Service(models.Model):
......
# coding=utf8
from __future__ import unicode_literals, absolute_import, print_function
import operator
from django.db import models
from django.db.models import Q
from django.db import transaction
from gm_types.gaia import TAG_ALERT_TYPE
manager = lambda: models.Manager().db_manager(using='bran')
class TagConversion(models.Model):
class Meta:
verbose_name = '综合、美购、日记tab'
db_table = 'trade_tag_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=120, verbose_name='搜索时的关键词')
business_id = models.IntegerField(verbose_name='关键词精确匹配的tagID')
business_type = models.CharField(max_length=5, verbose_name='tag分级')
search_rate = models.FloatField(verbose_name='搜索次数占比')
conversion_rate = models.FloatField(verbose_name='tag转化率')
class DoctorConversion(models.Model):
class Meta:
verbose_name = '医生tab'
db_table = 'trade_doctor_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=128)
business_id = models.CharField(max_length=100)
business_type = models.CharField(max_length=1)
search_rate = models.FloatField()
conversion_rate = models.FloatField()
class HospitalConversion(models.Model):
class Meta:
verbose_name = '医院tab'
db_table = 'trade_hospital_conversion'
get_latest_by = 'update_date'
unique_together = ('update_date', 'province_id', 'business_id')
objects = manager()
update_date = models.DateField()
province_id = models.CharField(max_length=40)
query = models.CharField(max_length=128)
business_id = models.CharField(max_length=100)
business_type = models.CharField(max_length=1)
search_rate = models.FloatField()
conversion_rate = models.FloatField()
......@@ -6,6 +6,8 @@ import operator
from django.db import models
from django.db.models import Q
from django.db import transaction
import logging
import traceback
from gm_types.gaia import TAG_ALERT_TYPE
from django.utils import timezone
......@@ -13,6 +15,7 @@ from gm_upload import IMG_TYPE, ImgUrlField
from gm_types.gaia import (
DOCTOR_PROCESS_STATUS, DOCTOR_TYPE, PHONE_HINTS, TOPIC_TYPE, TAG_TYPE, DisplayTag,
SERVICE_FLAG)
from trans2es.models.query_word_conversion import TagConversion
class Tag(models.Model):
......@@ -21,7 +24,6 @@ class Tag(models.Model):
verbose_name_plural = "标签"
app_label = 'api'
#objects = TagManager()
name = models.CharField(
verbose_name='名称',
......@@ -38,4 +40,4 @@ class Tag(models.Model):
blank=False,
choices=TAG_TYPE,
)
is_online = models.BooleanField(null=False, default=True, verbose_name='是否上线')
is_online = models.BooleanField(null=False, default=True, verbose_name='是否上线')
\ No newline at end of file
......@@ -139,7 +139,7 @@ class TypeInfo(object):
"input": suggest_item["input"]
}
suggest_dict["suggest_type"] = suggest_item["suggest_type"]
suggest_dict["offline_score"] = suggest_item["word_weight"]
suggest_dict["offline_score"] = suggest_item["word_weight"] + suggest_dict["order_weight"]
suggest_dict["id"] = str(suggest_dict["id"]) + "_" + str(suggest_item["cur_index"])
data_list.append(suggest_dict)
except Exception:
......@@ -299,7 +299,7 @@ def get_type_info_map():
),
TypeInfo(
name='suggest',
type='productwiki_tips', # brandwiki
type='productwiki_tips', # productwiki
model=productwiki.ProductWiki,
query_deferred=lambda: productwiki.ProductWiki.objects.all().query,
get_data_func=ProduceWikiTransfer.get_productwiki_suggest_data_list,
......@@ -336,9 +336,19 @@ def tips_attr_sync_to_redis_type_info_map():
TypeInfo(
name='suggest',
type='doctor_results_num',# doctor结果数
model=tag.Tag,
query_deferred=lambda: tag.Tag.objects.all().query,
get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='suggest',
type='tag_results_num', # tag结果数
model=doctor.Doctor,
query_deferred=lambda: doctor.Doctor.objects.all().query,
get_data_func=DoctorTransfer.get_doctor_data_name_mapping_results_to_redis,
get_data_func=TagTransfer.get_tag_data_name_mapping_results_to_redis,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
......
......@@ -10,6 +10,8 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class BrandWikiTransfer(object):
......@@ -27,9 +29,9 @@ class BrandWikiTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["type_flag"] = "unknown"
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,6 +10,8 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class CollectWikiTransfer(object):
......@@ -27,9 +29,9 @@ class CollectWikiTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["type_flag"] = "unknown"
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,6 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class DoctorTransfer(object):
......@@ -84,8 +85,8 @@ class DoctorTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["order_weight"] = QueryWordAttr.get_doctor_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "doctor"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 2
......
......@@ -10,6 +10,7 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class HospitalTransfer(object):
......@@ -27,8 +28,8 @@ class HospitalTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["order_weight"] = QueryWordAttr.get_hospital_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "hospital"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 1
......
......@@ -10,6 +10,8 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class ItemWikiTransfer(object):
......@@ -17,6 +19,68 @@ class ItemWikiTransfer(object):
{'host': '10.30.57.94', 'port': 9200}
]
@classmethod
def get_wiki_data_name_mapping_results_to_redis(cls, instance):
try:
wiki_name = instance.name.strip()
if wiki_name:
multi_fields = {
'name': 1,
'association_tags': 1,
"description": 1,
"use_result": 1,
"treatment_method": 1,
"effect": 1,
"ingredient": 1,
"nature_type": 1,
"shape": 1,
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': wiki_name,
'type': 'cross_fields',
'operator': 'or',
'fields': query_fields,
}
q = {
"size": 0,
"query":{
"bool":{
"should":[
{"multi_match": multi_match}
],
"must":[
{"term": {"is_online": True}}
],
"minimum_should_match": 1
}
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(cls.test_es_info_list), sub_index_name="newwiki", doc_type="newwiki", query_body=q,offset=0,size=0)
doctor_results = result_dict["total_count"]
redis_key_prefix = "search_tips:tips_mapping_num"
redis_data = redis_client.hget(redis_key_prefix,wiki_name)
redis_val_dict = json.loads(str(redis_data)) if redis_data else {}
total_count = doctor_results
if 'w' in redis_val_dict:
total_count += int(redis_val_dict['w'])
redis_val_dict['t'] = total_count
redis_val_dict['w'] = doctor_results
redis_client.hset(redis_key_prefix,wiki_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_itemwiki_suggest_data_list(cls, instance):
try:
......@@ -27,9 +91,9 @@ class ItemWikiTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["type_flag"] = "unknown"
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,6 +10,8 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class ProduceWikiTransfer(object):
......@@ -27,9 +29,9 @@ class ProduceWikiTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["type_flag"] = "unknown"
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 3
......
......@@ -10,12 +10,84 @@ from libs.cache import redis_client
import json
from trans2es.commons.commons import get_tips_suggest_list
from trans2es.commons.words_utils import QueryWordAttr
class TagTransfer(object):
test_es_info_list = [
{'host': '10.30.57.94', 'port': 9200}
]
tips_num_redis_key_prefix = "search_tips:tips_mapping_num"
@classmethod
def get_tag_data_name_mapping_results_to_redis(cls, instance):
try:
tag_name = instance.name.strip()
q = dict()
if tag_name:
multi_fields = {
'short_description': 1,
'doctor.name': 1,
'doctor.hospital.name': 1,
'doctor.hospital.city_name': 1,
'doctor.hospital.city_province_name': 1,
'closure_tags': 1, # 5.9版 搜索所有tag
'doctor.hospital.officer_name': 1 # 搜索机构管理者
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
'query': tag_name,
'type': 'cross_fields',
'operator': 'and',
'fields': query_fields,
}
sku_query = {
"nested": {
"path": "sku_list",
"query": {
"multi_match": {
"query": tag_name,
"fields": ["sku_list.name^2"],
'operator': 'and',
'type': 'cross_fields'
}
}
}
}
q['query'] = {
"size":0,
'bool': {
"should": [
{'multi_match': multi_match},
sku_query
],
"must":[
{"term":{"is_online":True}}
],
"minimum_should_match": 1
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(cls.test_es_info_list), sub_index_name="service", doc_type="service", query_body=q,offset=0,size=0)
doctor_results = result_dict["total_count"]
redis_data = redis_client.hget(cls.tips_num_redis_key_prefix, tag_name)
redis_val_dict = json.loads(str(redis_data)) if redis_data else {}
total_count = doctor_results
if 's' in redis_val_dict:
total_count += int(redis_val_dict['s'])
redis_val_dict['t'] = total_count
redis_val_dict['s'] = doctor_results
redis_client.hset(cls.tips_num_redis_key_prefix,tag_name, json.dumps(redis_val_dict))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@classmethod
def get_tag_suggest_data_list(cls, instance):
......@@ -27,9 +99,9 @@ class TagTransfer(object):
item_dict["ori_name"] = instance.name
item_dict["is_online"] = instance.is_online
item_dict["order_weight"] = 0.0
item_dict["results_num"] = 0
item_dict["type_flag"] = "unknown"
item_dict["order_weight"] = QueryWordAttr.get_project_query_word_weight(instance.name)
item_dict["results_num"] = QueryWordAttr.get_query_results_num(instance.name)
item_dict["type_flag"] = "project"
item_dict["offline_score"] = 0.0
item_dict["tips_name_type"] = 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment