Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
search_tips
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
search_tips
Commits
3b6b6fbb
Commit
3b6b6fbb
authored
Jan 15, 2020
by
lixiaofang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
6854ba01
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
112 additions
and
155 deletions
+112
-155
auto_tips.py
search/utils/auto_tips.py
+108
-154
commons.py
trans2es/commons/commons.py
+2
-0
itemwiki_transfer.py
trans2es/utils/itemwiki_transfer.py
+2
-1
No files found.
search/utils/auto_tips.py
View file @
3b6b6fbb
...
@@ -17,79 +17,114 @@ from pypinyin import pinyin, lazy_pinyin
...
@@ -17,79 +17,114 @@ from pypinyin import pinyin, lazy_pinyin
def
get_suggest_tips
(
query
,
lat
,
lng
,
offset
=
0
,
size
=
50
):
def
get_suggest_tips
(
query
,
lat
,
lng
,
offset
=
0
,
size
=
50
):
try
:
try
:
###先获取完全命中的query
have_read_tips_set
,
ret_list
,
result_dict
=
get_query_by_es
(
query
=
str
(
query
)
.
lower
(),
lat
=
lat
,
lng
=
lng
,
# qq = {
offset
=
offset
,
# "query": {
size
=
size
)
# "bool": {
if
len
(
ret_list
)
>=
50
:
# "must": [{
return
ret_list
# "term": {
# "ori_name": query
else
:
# }
query_ret_list
=
[]
# }, {
wordresemble_ret_list
=
[]
# "term": {
value_data
=
[]
# "is_online": True
QUERY_KEY
=
"query:search_tip"
# }
query_base64
=
base64
.
b64encode
(
query
.
encode
(
'utf8'
))
.
decode
(
'utf8'
)
# }]
if
redis_client
.
hget
(
QUERY_KEY
,
query_base64
)
is
not
None
:
# }
value_data
=
json
.
loads
(
str
(
redis_client
.
hget
(
QUERY_KEY
,
query_base64
),
encoding
=
'utf-8'
))
# }
# }
if
len
(
value_data
)
>
0
:
# logging.info("get qqqqqqqqq:%s" % qq)
for
i
in
value_data
:
# whole_query = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="suggest", query_body=qq,
key
=
list
(
i
.
keys
())[
0
]
# offset=0, size=1, is_suggest_request=True)
ori_name
=
str
(
base64
.
b64decode
(
key
),
"utf-8"
)
#
if
ori_name
not
in
have_read_tips_set
:
# logging.info("get whole_query:%s" % whole_query)
have_read_tips_set
.
add
(
ori_name
)
result_num
=
i
.
get
(
key
,
0
)
describe
=
"约"
+
str
(
result_num
)
+
"个结果"
if
result_num
else
""
logging
.
info
(
"get result_num:
%
s"
%
result_num
)
highlight_marks
=
u'<ems>
%
s</ems>'
%
query
# highlight_name = ori_name.replace(query, highlight_marks)
highlight_name
=
set_highlihgt
(
query
,
ori_name
)
if
ori_name
==
query
:
query_ret_list
.
append
(
{
"results_num"
:
result_num
,
"ori_name"
:
ori_name
,
"id"
:
None
,
"is_online"
:
True
,
"offline_score"
:
0
,
"type_flag"
:
get_tips_word_type
(
ori_name
),
"highlight_name"
:
highlight_name
,
"describe"
:
describe
})
else
:
wordresemble_ret_list
.
append
(
{
"results_num"
:
result_num
,
"ori_name"
:
ori_name
,
"id"
:
None
,
"is_online"
:
True
,
"offline_score"
:
0
,
"type_flag"
:
get_tips_word_type
(
ori_name
),
"highlight_name"
:
highlight_name
,
"describe"
:
describe
})
ret_list
.
extend
(
query_ret_list
)
ret_list
.
extend
(
wordresemble_ret_list
)
###繁体字删掉,把搜索结果加到简体字上边
fanti_query
=
[{
'痩脸针'
:
"瘦脸针"
}]
for
item
in
ret_list
:
result_num
=
[[
item
[
'results_num'
],
list
(
ret
.
values
())[
0
],
list
(
ret
.
keys
())[
0
]]
for
ret
in
fanti_query
if
list
(
ret
.
keys
())[
0
]
==
item
[
'ori_name'
]]
if
len
(
result_num
)
>
0
:
ret_list
.
remove
(
item
)
for
item
in
ret_list
:
if
item
[
'ori_name'
]
==
result_num
[
0
][
1
]:
item
[
'results_num'
]
+=
result_num
[
0
][
0
]
####
if
len
(
ret_list
)
>=
50
:
return
ret_list
[
0
:
50
]
elif
len
(
ret_list
)
<
50
and
len
(
ret_list
)
>
0
:
return
ret_list
else
:
##无结果的时候把汉字转成拼音再搜一次
ss
=
lazy_pinyin
(
query
)
str_query
=
''
for
item
in
ss
:
str_query
+=
str
(
item
)
have_read_tips_set
,
ret_list
,
result_dict
=
get_query_by_es
(
query
=
str_query
,
lat
=
lat
,
lng
=
lng
,
offset
=
offset
,
size
=
size
)
logging
.
info
(
"get ret_list:
%
s"
%
ret_list
)
return
ret_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
def
set_highlihgt
(
query
=
None
,
ori_name
=
None
):
###高亮调整
all_word
=
set
()
query2
=
ori_name
for
item
in
range
(
0
,
len
(
query
)):
all_word
.
add
(
query
[
item
])
for
item
in
all_word
:
is_find
=
query2
.
find
(
item
)
high_query
=
None
if
is_find
>=
0
:
highlight_marks
=
u'<>
%
s</>'
%
item
high_query
=
query2
.
replace
(
item
,
highlight_marks
)
query2
=
high_query
if
high_query
==
None
:
highlight_name
=
query2
else
:
highlight_name
=
high_query
highlight_name
=
query2
.
replace
(
'<>'
,
'<ems>'
)
.
replace
(
'</>'
,
'</ems>'
)
# hits = whole_query["hits"]["hits"]
return
highlight_name
#
# for item in hits:
# have_read_tips_set.add(item["_source"]["ori_name"])
def
get_query_by_es
(
query
=
''
,
lat
=
0
,
lng
=
0
,
size
=
0
,
offset
=
0
):
# id = item["_source"]["id"]
try
:
# ori_name = item["_source"]["ori_name"]
# results_num = item["_source"]["results_num"]
# is_online = item["_source"]["is_online"]
# offline_score = item["_source"]["offline_score"]
# type_flag = item["_source"]["type_flag"]
#
# highlight_marks = u'<ems>%s</ems>' % query
# item["_source"]["highlight_name"] = item["_source"]["ori_name"].replace(query, highlight_marks)
# highlight_name = item["_source"]["highlight_name"]
# if item["_source"]["type_flag"] == "hospital":
# if lat is not None and lng is not None and lat != 0.0 and lng != 0.0:
#
# logging.info("get g_hospital_pos_dict:%s" % g_hospital_pos_dict)
# if item["_source"]["ori_name"] in g_hospital_pos_dict:
# distance = point_distance(lng, lat,
# g_hospital_pos_dict[item["_source"]["ori_name"]][0],
# g_hospital_pos_dict[item["_source"]["ori_name"]][1])
# if distance < 1000 * 50:
# if distance < 1000:
# if distance < 100:
# item["_source"]["describe"] = "<100" + "米"
# else:
# item["_source"]["describe"] = "约" + str(int(distance)) + "米"
# else:
# item["_source"]["describe"] = "约" + str(
# round(1.0 * distance / 1000, 1)) + "km"
# else:
# item["_source"]["describe"] = ">50km"
# else:
# item["_source"]["describe"] = ""
#
# else:
# item["_source"]["describe"] = ""
# else:
# if item["_source"]["type_flag"] == "doctor":
# item["_source"]["describe"] = ""
# else:
# item["_source"]["describe"] = "约" + str(item["_source"]["results_num"]) + "个结果" if \
# item["_source"]["results_num"] else ""
#
# ret_list.append({"offline_score": offline_score, "ori_name": ori_name, "results_num": results_num, "id": id,
# "highlight_name": highlight_name, "type_flag": type_flag,
# "is_online": is_online})
###获取联想到的数据
# ios输入法在某些情况下会携带\\u2006
query
=
query
.
replace
(
"
\u2006
"
,
''
)
query
=
query
.
replace
(
"
\u2006
"
,
''
)
q
=
{
q
=
{
"suggest"
:
{
"suggest"
:
{
...
@@ -178,88 +213,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
...
@@ -178,88 +213,7 @@ def get_suggest_tips(query, lat, lng, offset=0, size=50):
ret_list
.
extend
(
get_tag_wiki_data
)
ret_list
.
extend
(
get_tag_wiki_data
)
ret_list
.
extend
(
get_doctor_hospital_data
)
ret_list
.
extend
(
get_doctor_hospital_data
)
if
len
(
result_dict
[
"suggest"
][
"tips-suggest"
])
>=
50
:
return
have_read_tips_set
,
ret_list
,
result_dict
return
ret_list
else
:
query_ret_list
=
[]
wordresemble_ret_list
=
[]
value_data
=
[]
QUERY_KEY
=
"query:search_tip"
query_base64
=
base64
.
b64encode
(
query
.
encode
(
'utf8'
))
.
decode
(
'utf8'
)
if
redis_client
.
hget
(
QUERY_KEY
,
query_base64
)
is
not
None
:
value_data
=
json
.
loads
(
str
(
redis_client
.
hget
(
QUERY_KEY
,
query_base64
),
encoding
=
'utf-8'
))
if
len
(
value_data
)
>
0
:
for
i
in
value_data
:
key
=
list
(
i
.
keys
())[
0
]
ori_name
=
str
(
base64
.
b64decode
(
key
),
"utf-8"
)
if
ori_name
not
in
have_read_tips_set
:
have_read_tips_set
.
add
(
ori_name
)
result_num
=
i
.
get
(
key
,
0
)
describe
=
"约"
+
str
(
result_num
)
+
"个结果"
if
result_num
else
""
logging
.
info
(
"get result_num:
%
s"
%
result_num
)
highlight_marks
=
u'<ems>
%
s</ems>'
%
query
# highlight_name = ori_name.replace(query, highlight_marks)
highlight_name
=
set_highlihgt
(
query
,
ori_name
)
if
ori_name
==
query
:
query_ret_list
.
append
(
{
"results_num"
:
result_num
,
"ori_name"
:
ori_name
,
"id"
:
None
,
"is_online"
:
True
,
"offline_score"
:
0
,
"type_flag"
:
get_tips_word_type
(
ori_name
),
"highlight_name"
:
highlight_name
,
"describe"
:
describe
})
else
:
wordresemble_ret_list
.
append
(
{
"results_num"
:
result_num
,
"ori_name"
:
ori_name
,
"id"
:
None
,
"is_online"
:
True
,
"offline_score"
:
0
,
"type_flag"
:
get_tips_word_type
(
ori_name
),
"highlight_name"
:
highlight_name
,
"describe"
:
describe
})
ret_list
.
extend
(
query_ret_list
)
ret_list
.
extend
(
wordresemble_ret_list
)
###繁体字删掉,把搜索结果加到简体字上边
fanti_query
=
[{
'痩脸针'
:
"瘦脸针"
}]
for
item
in
ret_list
:
result_num
=
[[
item
[
'results_num'
],
list
(
ret
.
values
())[
0
],
list
(
ret
.
keys
())[
0
]]
for
ret
in
fanti_query
if
list
(
ret
.
keys
())[
0
]
==
item
[
'ori_name'
]]
if
len
(
result_num
)
>
0
:
ret_list
.
remove
(
item
)
for
item
in
ret_list
:
if
item
[
'ori_name'
]
==
result_num
[
0
][
1
]:
item
[
'results_num'
]
+=
result_num
[
0
][
0
]
####
if
len
(
ret_list
)
>=
50
:
return
ret_list
[
0
:
50
]
else
:
return
ret_list
except
:
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
return
set
(),
list
(),
list
()
def
set_highlihgt
(
query
=
None
,
ori_name
=
None
):
###高亮调整
all_word
=
set
()
query2
=
ori_name
for
item
in
range
(
0
,
len
(
query
)):
all_word
.
add
(
query
[
item
])
for
item
in
all_word
:
is_find
=
query2
.
find
(
item
)
high_query
=
None
if
is_find
>=
0
:
highlight_marks
=
u'<ems>
%
s</ems>'
%
item
high_query
=
query2
.
replace
(
item
,
highlight_marks
)
query2
=
high_query
if
high_query
==
None
:
highlight_name
=
query2
else
:
highlight_name
=
high_query
return
highlight_name
trans2es/commons/commons.py
View file @
3b6b6fbb
...
@@ -17,8 +17,10 @@ def uuid4():
...
@@ -17,8 +17,10 @@ def uuid4():
"""
"""
return
uuid
.
uuid4
()
.
hex
return
uuid
.
uuid4
()
.
hex
def
get_tips_suggest_list
(
instance_cn_name
):
def
get_tips_suggest_list
(
instance_cn_name
):
try
:
try
:
print
(
instance_cn_name
)
# ch_full_weight = 6.0 * 1000
# ch_full_weight = 6.0 * 1000
# py_full_weight = 3.0 * 1000
# py_full_weight = 3.0 * 1000
full_weight
=
3.0
*
1000
full_weight
=
3.0
*
1000
...
...
trans2es/utils/itemwiki_transfer.py
View file @
3b6b6fbb
...
@@ -97,8 +97,9 @@ class ItemWikiTransfer(object):
...
@@ -97,8 +97,9 @@ class ItemWikiTransfer(object):
ret_list
.
append
(
item_dict
)
ret_list
.
append
(
item_dict
)
suggest_list
=
get_tips_suggest_list
(
instance
.
name
)
suggest_list
=
get_tips_suggest_list
(
str
(
instance
.
name
)
.
lower
()
)
logging
.
info
(
"get suggest_list:
%
s"
%
suggest_list
)
return
(
item_dict
,
suggest_list
)
return
(
item_dict
,
suggest_list
)
except
:
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment