Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
search_tips
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
search_tips
Commits
a8df3524
Commit
a8df3524
authored
Jan 16, 2020
by
lixiaofang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
ca099d92
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
268 additions
and
25 deletions
+268
-25
es.py
libs/es.py
+2
-1
auto_tips.py
search/utils/auto_tips.py
+71
-14
commons.py
trans2es/commons/commons.py
+4
-4
type_info.py
trans2es/type_info.py
+2
-2
tag_transfer.py
trans2es/utils/tag_transfer.py
+3
-3
wordresemble.py
trans2es/utils/wordresemble.py
+186
-1
No files found.
libs/es.py
View file @
a8df3524
...
...
@@ -58,7 +58,7 @@ class ESPerform(object):
os
.
path
.
dirname
(
__file__
),
'..'
,
'trans2es'
,
'mapping'
,
'
%
s.json'
%
(
doc_type
,))
if
doc_type
==
"associate_tag"
or
doc_type
==
"associate_tag_brand"
or
doc_type
==
"associate_tag_doctor"
\
or
doc_type
==
"associate_tag_instrument"
or
doc_type
==
"associate_tag_hospital"
or
doc_type
==
"associate_tag_project"
\
or
doc_type
==
"associate_tag_instrument"
or
doc_type
==
"associate_tag_hospital"
or
doc_type
==
"associate_tag_project"
\
or
doc_type
==
"associate_tag_position"
:
mapping_file_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
...
...
@@ -184,6 +184,7 @@ class ESPerform(object):
official_index_name
=
cls
.
get_official_index_name
(
sub_index_name
,
"read"
)
index_exists
=
es_cli
.
indices
.
exists
(
official_index_name
)
print
(
index_exists
)
if
not
index_exists
:
if
not
auto_create_index
:
logging
.
error
(
"index:
%
s is not existing,get_search_results error!"
%
official_index_name
)
...
...
search/utils/auto_tips.py
View file @
a8df3524
...
...
@@ -16,9 +16,29 @@ from trans2es.commons.words_utils import QueryWordAttr, get_tips_word_type
def
get_suggest_tips
(
query
,
lat
,
lng
,
offset
=
0
,
size
=
50
,
device_id
=
None
):
"""
这个地方目前是有三个逻辑 分两个灰度验证
50
%
前段强加权后根据tab转化率和词频进行排序 索引 suggest gray_number=1 尾号["0", "1", "2", "3", "c", "d", "e", "f"]
25
%
去掉前段强加权后 直接根据tag转化率和词频进行排序 suggest-v1 gray_number=2 尾号["4", "5", "6", "a"]
25
%
去掉前段强加权后 根据tag转化率和词频进行排序 再根据前段加权展示排序 suggest-v1 gray_number=3 尾号["9", "8", "7", "b"]
:param query:
:param lat:
:param lng:
:param offset:
:param size:
:param device_id:
:return:
"""
try
:
###加两层灰度
###在原来的逻辑上加两层灰度
gray_number
=
recommed_service_category_device_id
(
device_id
)
if
gray_number
==
1
:
sub_index_name
=
"suggest"
else
:
sub_index_name
=
"suggest-v1"
###获取联想到的数据
# ios输入法在某些情况下会携带\\u2006
...
...
@@ -49,7 +69,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list
=
list
()
doctor_hospital_equal_query
=
list
()
tag_equal_query
=
list
()
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"suggest"
,
query_body
=
q
,
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
sub_index_name
,
query_body
=
q
,
offset
=
offset
,
size
=
size
,
is_suggest_request
=
True
)
for
tips_item
in
result_dict
[
"suggest"
][
"tips-suggest"
]:
...
...
@@ -110,9 +130,41 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50, device_id=None):
ret_list
.
extend
(
get_tag_wiki_data
)
ret_list
.
extend
(
get_doctor_hospital_data
)
if
len
(
result_dict
[
"suggest"
][
"tips-suggest"
])
>=
50
:
return
ret_list
if
len
(
ret_list
)
>=
50
:
if
gray_number
in
(
1
,
2
):
logging
.
info
(
"get-----------------ret_list:
%
s"
%
ret_list
)
return
ret_list
[:
50
]
else
:
logging
.
info
(
"get+++++++++++++++++ret_list:
%
s"
%
ret_list
)
# 在去掉强加权的逻辑上根据词频和tag转化率排序后再前段强加权
front_data
=
[]
end_data
=
[]
equal_data
=
[]
need_change_sort
=
ret_list
[:
30
]
for
item
in
need_change_sort
:
ori_name
=
item
.
get
(
"ori_name"
,
None
)
if
query
==
ori_name
:
equal_data
.
append
(
item
)
elif
query
==
ori_name
[:
len
(
query
)]:
front_data
.
append
(
item
)
else
:
end_data
.
append
(
item
)
logging
.
info
(
"get+++++++++++++++++equal_data:
%
s"
%
equal_data
)
logging
.
info
(
"get+++++++++++++++++front_data:
%
s"
%
front_data
)
logging
.
info
(
"get+++++++++++++++++end_data:
%
s"
%
end_data
)
logging
.
info
(
"get+++++++++++++++++ret_list:
%
s"
%
ret_list
)
equal_data
.
extend
(
front_data
)
equal_data
.
extend
(
end_data
)
equal_data
.
extend
(
ret_list
[
30
:])
ret_list
=
equal_data
return
ret_list
else
:
query_ret_list
=
[]
wordresemble_ret_list
=
[]
...
...
@@ -197,21 +249,26 @@ def set_highlihgt(query=None, ori_name=None):
return
highlight_name
def
recommed_service_category_device_id
(
device_id
,
real_cary
=
False
):
def
recommed_service_category_device_id
(
device_id
):
try
:
'''
设备品类显示, 是否命中灰度
'''
categroy_select_cary
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"a"
,
"b"
,
"c"
]
categroy_select_cary1
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"c"
,
"d"
,
"e"
,
"f"
]
categroy_select_cary2
=
[
"4"
,
"5"
,
"6"
,
"a"
]
categroy_select_cary3
=
[
"9"
,
"8"
,
"7"
,
"b"
]
if
not
device_id
:
return
False
return
1
hd_id
=
hashlib
.
md5
(
str
(
device_id
)
.
encode
())
.
hexdigest
()
is_gray
=
hd_id
[
-
1
]
in
categroy_select_cary
if
not
is_gray
:
gray_devices_key
=
"gm:gray:devices:key"
return
redis_client
.
sismember
(
gray_devices_key
,
device_id
)
return
is_gray
is_gray
=
hd_id
[
-
1
]
if
is_gray
in
categroy_select_cary2
:
return
2
elif
is_gray
in
categroy_select_cary3
:
return
3
else
:
return
1
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
return
1
trans2es/commons/commons.py
View file @
a8df3524
...
...
@@ -120,11 +120,11 @@ def get_tips_suggest_list_v1(instance_cn_name):
try
:
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight
=
3.0
*
1000
py_acronym_full_weight
=
3.0
*
1000
full_weight
=
1
py_acronym_full_weight
=
1
py_acronym_prefix_weight
=
2
ch_prefix_weight
=
1
.5
py_acronym_prefix_weight
=
1
ch_prefix_weight
=
1
py_prefix_weight
=
1.0
# 命中开始部分加权
...
...
trans2es/type_info.py
View file @
a8df3524
...
...
@@ -21,7 +21,7 @@ from trans2es.utils.collectwiki_transfer import CollectWikiTransfer, CollectWiki
from
trans2es.utils.brandwiki_transfer
import
BrandWikiTransfer
,
BrandWikiTransferV1
from
trans2es.utils.productwiki_transfer
import
ProduceWikiTransfer
,
ProduceWikiTransferV1
from
trans2es.utils.tag_transfer
import
TagTransfer
,
TagTransferV1
from
trans2es.utils.wordresemble
import
WordResemble
from
trans2es.utils.wordresemble
import
WordResemble
,
WordResembleV1
from
libs.es
import
ESPerform
from
libs.tools
import
tzlc
,
getMd5Digest
from
trans2es.commons.words_utils
import
QueryWordAttr
...
...
@@ -445,7 +445,7 @@ def get_type_info_map():
model
=
wordresemble
.
WordRel
,
query_deferred
=
lambda
:
wordresemble
.
WordRel
.
objects
.
filter
(
category__in
=
[
13
,
12
,
11
,
9
,
1
])
.
query
,
get_data_func
=
WordResemble
.
get_resemble_list
,
get_data_func
=
WordResemble
V1
.
get_resemble_list
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
...
...
trans2es/utils/tag_transfer.py
View file @
a8df3524
...
...
@@ -10,7 +10,7 @@ from libs.cache import redis_client
import
json
from
django.conf
import
settings
from
trans2es.commons.commons
import
get_tips_suggest_list
,
get_tips_suggest_list_v1
from
trans2es.commons.commons
import
get_tips_suggest_list
,
get_tips_suggest_list_v1
from
trans2es.commons.words_utils
import
QueryWordAttr
,
get_tips_word_type
...
...
@@ -97,8 +97,8 @@ class TagTransfer(object):
for
i
in
cut_word
:
if
keyword
.
find
(
i
)
>=
0
:
cut_bool
=
True
item_dict
=
dict
()
if
cut_bool
==
False
:
item_dict
=
dict
()
item_dict
[
"id"
]
=
getMd5Digest
(
str
(
instance
.
name
))
item_dict
[
"ori_name"
]
=
instance
.
name
item_dict
[
"is_online"
]
=
instance
.
is_online
...
...
@@ -202,8 +202,8 @@ class TagTransferV1(object):
for
i
in
cut_word
:
if
keyword
.
find
(
i
)
>=
0
:
cut_bool
=
True
item_dict
=
dict
()
if
cut_bool
==
False
:
item_dict
=
dict
()
item_dict
[
"id"
]
=
getMd5Digest
(
str
(
instance
.
name
))
item_dict
[
"ori_name"
]
=
instance
.
name
item_dict
[
"is_online"
]
=
instance
.
is_online
...
...
trans2es/utils/wordresemble.py
View file @
a8df3524
...
...
@@ -12,7 +12,7 @@ import json
from
django.conf
import
settings
from
trans2es.models
import
wordresemble
from
trans2es.commons.words_utils
import
QueryWordAttr
,
get_tips_word_type
from
trans2es.commons.commons
import
get_tips_suggest_list
from
trans2es.commons.commons
import
get_tips_suggest_list
,
get_tips_suggest_list_v1
from
trans2es.utils.doctor_transfer
import
DoctorTransfer
from
trans2es.utils.itemwiki_transfer
import
ItemWikiTransfer
from
trans2es.utils.tag_transfer
import
TagTransfer
...
...
@@ -201,3 +201,188 @@ class WordResemble(object):
redis_client
.
hset
(
QUERY_KEY
,
query_base64
,
json
.
dumps
(
keyword_value
))
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
class
WordResembleV1
(
object
):
@classmethod
def
get_word_resemble_list
(
cls
,
keyword
):
try
:
query_sql_item
=
wordresemble
.
WordRel
.
objects
.
filter
(
keyword
=
keyword
)
temp_list
=
list
()
for
sql_obj
in
query_sql_item
:
temp_list
.
extend
(
list
(
sql_obj
.
all_resembles
.
all
()
.
values_list
(
'word'
,
flat
=
True
)))
resemble_list
=
list
()
for
item
in
temp_list
:
resemble_list
.
extend
(
item
.
split
(
"、"
))
return
resemble_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
@classmethod
def
get_resemble_list
(
cls
,
instance
):
try
:
try
:
ret_list
=
list
()
item_dict
=
dict
()
keyword
=
instance
.
keyword
cut_bool
=
False
cut_word
=
[
"下线"
,
"停用"
,
"已经下线"
,
"账号停用"
]
for
i
in
cut_word
:
if
keyword
.
find
(
i
)
>=
0
:
cut_bool
=
True
if
cut_bool
==
False
:
item_dict
[
"id"
]
=
getMd5Digest
(
str
(
instance
.
keyword
))
item_dict
[
"ori_name"
]
=
instance
.
keyword
item_dict
[
"is_online"
]
=
True
item_dict
[
"order_weight"
]
=
QueryWordAttr
.
get_project_query_word_weight
(
instance
.
keyword
)
item_dict
[
"results_num"
]
=
QueryWordAttr
.
get_query_results_num
(
instance
.
keyword
)
item_dict
[
"type_flag"
]
=
get_tips_word_type
(
instance
.
keyword
)
item_dict
[
"offline_score"
]
=
0.0
item_dict
[
"tips_name_type"
]
=
4
ret_list
.
append
(
item_dict
)
suggest_list
=
get_tips_suggest_list_v1
(
instance
.
keyword
)
return
(
item_dict
,
suggest_list
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
([],
[])
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
@classmethod
def
get_all_data_name_mapping_results_to_redis
(
cls
,
instance
):
try
:
total_count
=
0
instance
.
name
=
instance
.
keyword
# 获取百科的
ItemWikiTransfer
.
get_wiki_data_name_mapping_results_to_redis
(
instance
)
# 获取美购的
TagTransfer
.
get_tag_data_name_mapping_results_to_redis
(
instance
)
# 获取医生医院的
DoctorTransfer
.
get_doctor_data_name_mapping_results_to_redis
(
instance
)
# 获取日记的
WordResemble
.
get_diary_data_name_mapping_results_to_redis
(
instance
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
@classmethod
def
get_diary_data_name_mapping_results_to_redis
(
cls
,
instance
):
try
:
tips_num_redis_key_prefix
=
"search_tips:tips_mapping_num"
tag_name
=
instance
.
name
.
strip
()
q
=
dict
()
if
tag_name
:
multi_fields
=
{
'tags'
:
8
,
'doctor.name'
:
4
,
'doctor.hospital.name'
:
3
,
'doctor.hospital.officer_name'
:
3
,
'user.last_name'
:
2
,
'service.name'
:
1
,
"title"
:
2
}
query_fields
=
[
'^'
.
join
((
k
,
str
(
v
)))
for
(
k
,
v
)
in
multi_fields
.
items
()]
multi_match
=
{
'query'
:
tag_name
,
'type'
:
'cross_fields'
,
'operator'
:
'and'
,
'fields'
:
query_fields
,
}
q
[
'query'
]
=
{
'bool'
:
{
"should"
:
[
{
'multi_match'
:
multi_match
}
],
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}}
],
"minimum_should_match"
:
1
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(
settings
.
GM_ORI_ES_INFO_LIST
),
sub_index_name
=
"diary"
,
doc_type
=
"diary"
,
query_body
=
q
,
offset
=
0
,
size
=
0
)
doctor_results
=
result_dict
[
"total_count"
]
redis_data
=
redis_client
.
hget
(
tips_num_redis_key_prefix
,
tag_name
)
redis_val_dict
=
json
.
loads
(
str
(
redis_data
,
encoding
=
"utf-8"
))
if
redis_data
else
{}
total_count
=
doctor_results
if
't'
in
redis_val_dict
:
total_count
+=
int
(
redis_val_dict
[
't'
])
redis_val_dict
[
't'
]
=
total_count
redis_val_dict
[
'r'
]
=
doctor_results
redis_client
.
hset
(
tips_num_redis_key_prefix
,
tag_name
,
json
.
dumps
(
redis_val_dict
))
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
@classmethod
def
set_data_to_redis
(
cls
,
instance
):
try
:
keyword_value
=
[]
QUERY_KEY
=
"query:search_tip"
tips_num_redis_key_prefix
=
"search_tips:tips_mapping_num"
query_sql_item
=
wordresemble
.
WordRel
.
objects
.
filter
(
keyword
=
instance
.
keyword
)
for
sql_obj
in
query_sql_item
:
count
=
0
words
=
list
(
sql_obj
.
all_resembles
.
all
()
.
values_list
(
'word'
,
flat
=
True
))
query_base64
=
base64
.
b64encode
(
instance
.
keyword
.
encode
(
'utf8'
))
.
decode
(
'utf8'
)
for
items
in
words
:
count
+=
1
wordresemble_value
=
[]
# 先存储本体词
item_name
=
base64
.
b64encode
(
items
.
encode
(
'utf8'
))
.
decode
(
'utf8'
)
# 获取该同义词的结果
redis_data
=
redis_client
.
hget
(
tips_num_redis_key_prefix
,
items
)
logging
.
info
(
"get redis_data:
%
s"
%
redis_data
)
redis_val_dict
=
json
.
loads
(
str
(
redis_data
,
encoding
=
"utf-8"
))
if
redis_data
else
{}
total_count
=
0
if
't'
in
redis_val_dict
:
total_count
=
int
(
redis_val_dict
[
't'
])
value
=
{
item_name
:
total_count
}
keyword_value
.
append
(
value
)
redis_data
=
redis_client
.
hget
(
tips_num_redis_key_prefix
,
instance
.
keyword
)
redis_val_dict
=
json
.
loads
(
str
(
redis_data
,
encoding
=
"utf-8"
))
if
redis_data
else
{}
total_count
=
0
if
't'
in
redis_val_dict
:
total_count
=
int
(
redis_val_dict
[
't'
])
value
=
{
query_base64
:
total_count
}
wordresemble_value
.
append
(
value
)
if
count
==
1
:
keyword_value
.
append
(
value
)
if
words
:
for
w
in
words
:
value_name_w
=
base64
.
b64encode
(
w
.
encode
(
'utf8'
))
.
decode
(
'utf8'
)
redis_data
=
redis_client
.
hget
(
tips_num_redis_key_prefix
,
w
)
redis_val_dict
=
json
.
loads
(
str
(
redis_data
,
encoding
=
"utf-8"
))
if
redis_data
else
{}
total_count
=
0
if
't'
in
redis_val_dict
:
total_count
=
int
(
redis_val_dict
[
't'
])
value
=
{
value_name_w
:
total_count
}
wordresemble_value
.
append
(
value
)
if
len
(
wordresemble_value
)
>
0
:
logging
.
info
(
"get type wordresemble_value:
%
s"
%
type
(
json
.
dumps
(
wordresemble_value
)))
redis_client
.
hset
(
QUERY_KEY
,
item_name
,
json
.
dumps
(
wordresemble_value
))
if
len
(
keyword_value
)
>
0
:
logging
.
info
(
"get type keyword_value:
%
s"
%
type
(
json
.
dumps
(
keyword_value
)))
redis_client
.
hset
(
QUERY_KEY
,
query_base64
,
json
.
dumps
(
keyword_value
))
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment