Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
6c96b6f8
Commit
6c96b6f8
authored
5 years ago
by
段英荣
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modify search
parent
4d9341b6
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
97 additions
and
97 deletions
+97
-97
topic.py
search/utils/topic.py
+73
-97
trans2es_data2es_parallel.py
trans2es/management/commands/trans2es_data2es_parallel.py
+24
-0
No files found.
search/utils/topic.py
View file @
6c96b6f8
...
...
@@ -163,17 +163,7 @@ class TopicUtils(object):
q
[
"query"
]
=
dict
()
functions_list
=
[
{
"gauss"
:
{
"create_time"
:
{
"scale"
:
"1d"
,
"decay"
:
0.99
}
},
"weight"
:
60
}
]
functions_list
=
list
()
query_function_score
=
{
"query"
:
{
...
...
@@ -204,7 +194,7 @@ class TopicUtils(object):
]
}
},
"score_mode"
:
"
max
"
,
"score_mode"
:
"
sum
"
,
"boost_mode"
:
"sum"
,
"functions"
:
functions_list
}
...
...
@@ -228,26 +218,11 @@ class TopicUtils(object):
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[
{
"terms"
:
{
"user_id"
:
linucb_user_id_list
}}
]
if
query
is
not
None
:
# 搜索帖子
multi_match
=
{
'query'
:
query
,
'type'
:
'best_fields'
,
'operator'
:
'or'
,
'fields'
:
[
"content"
,
"tag_name_list"
],
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"highlight"
,
"description"
]
}
query_function_score
[
"boost_mode"
]
=
"replace"
tag_ids
=
get_same_tagset_ids
(
tag_id
)
query_function_score
[
"query"
][
"bool"
][
"should"
]
=
[
{
'multi_match'
:
multi_match
},
{
"terms"
:
{
"tag_list"
:
tag_ids
}},
{
"term"
:
{
"user_nick_name_pre"
:
query
.
lower
()}}
]
query_function_score
[
"query"
][
"bool"
][
"minimum_should_match"
]
=
1
query_function_score
[
"query"
][
"bool"
][
"filter"
]
.
append
(
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
6
}}}
)
else
:
if
query
is
None
:
if
"must_not"
in
query_function_score
[
"query"
][
"bool"
]:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
+=
[
{
"term"
:
{
"is_operation_home_recommend"
:
True
}}
...
...
@@ -264,19 +239,26 @@ class TopicUtils(object):
query_function_score
[
"query"
][
"bool"
][
"filter"
]
.
append
(
{
"term"
:
{
"content_level"
:
6
}}
)
q
[
"_source"
]
=
{
"includes"
:
[
"id"
,
"highlight"
,
"description"
]
}
if
query
is
None
:
if
user_id
and
user_id
>
0
:
redis_key_prefix
=
"physical:user_similar:participant_user_id:"
similar_redis_key
=
redis_key_prefix
+
str
(
user_id
)
redis_user_similar_data
=
redis_client
.
get
(
similar_redis_key
)
user_similar_list
=
json
.
loads
(
redis_user_similar_data
)
if
redis_user_similar_data
else
[]
if
len
(
user_similar_list
)
>
0
:
functions_list
.
extend
(
user_similar_list
)
# if user_id and user_id > 0:
# redis_key_prefix = "physical:user_similar:participant_user_id:"
# similar_redis_key = redis_key_prefix + str(user_id)
# redis_user_similar_data = redis_client.get(similar_redis_key)
# user_similar_list = json.loads(redis_user_similar_data) if redis_user_similar_data else []
# if len(user_similar_list) > 0:
# functions_list.extend(user_similar_list)
functions_list
.
append
(
{
"gauss"
:
{
"create_time"
:
{
"scale"
:
"1d"
,
"decay"
:
0.99
}
},
"weight"
:
60
}
)
if
len
(
attention_user_id_list
)
>
0
:
functions_list
.
append
(
{
...
...
@@ -334,87 +316,81 @@ class TopicUtils(object):
multi_match
=
{
'query'
:
query
,
'type'
:
'best_fields'
,
'operator'
:
'
and
'
,
'operator'
:
'
or
'
,
'fields'
:
[
"content"
,
"tag_name_list"
],
"analyzer"
:
"gm_default_index"
}
functions_list
+=
[
{
"weight"
:
400
,
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"term"
:
{
"user_nick_name_pre"
:
query
.
lower
()}
}
}
}
},
{
"weight"
:
400
,
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"bool"
:
{
"must"
:
{
"term"
:
{
"content_level"
:
6
},
},
"minimum_should_match"
:
1
,
"should"
:
[
{
'match_phrase'
:
{
"content"
:
query
}},
{
'match_phrase'
:
{
"tag_name_list"
:
query
}},
# {'multi_match': multi_match},
{
"term"
:
{
"tag_list"
:
tag_id
}},
query_function_score
[
"boost_mode"
]
=
"replace"
same_tagset_ids
=
get_same_tagset_ids
(
tag_id
)
query_function_score
[
"query"
][
"bool"
][
"should"
]
=
[
{
'multi_match'
:
multi_match
},
{
"terms"
:
{
"tag_list"
:
same_tagset_ids
}},
{
"term"
:
{
"user_nick_name_pre"
:
query
.
lower
()}}
]
query_function_score
[
"query"
][
"bool"
][
"minimum_should_match"
]
=
1
query_function_score
[
"query"
][
"bool"
][
"filter"
]
.
append
(
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
6
}}}
)
collection_redis_key_name
=
"physical:official_tag_name_set"
collect_tag_name_set
=
set
()
body
=
{
'text'
:
query
,
'analyzer'
:
"gm_default_search"
}
analyze_res
=
ESPerform
.
get_analyze_results
(
es_cli
=
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
body
)
for
item
in
analyze_res
[
"tokens"
]:
token_word
=
item
[
"token"
]
is_member
=
redis_client
.
sismember
(
collection_redis_key_name
,
token_word
)
if
is_member
:
collect_tag_name_set
.
add
(
token_word
)
query_fields
=
[
"content"
,
"tag_name_list"
]
multi_match
=
{
'query'
:
query
,
'type'
:
'best_fields'
,
'operator'
:
'and'
,
'fields'
:
query_fields
,
}
}
}
},
functions_list
+=
[
{
"weight"
:
400
,
"filter"
:
{
"constant_score"
:
{
"filter"
:
{
"bool"
:
{
"must"
:
{
"term"
:
{
"content_level"
:
5
},
},
"minimum_should_match"
:
1
,
"should"
:
[
{
'match_phrase'
:
{
"content"
:
query
}},
{
'match_phrase'
:
{
"tag_name_list"
:
query
}},
# {'multi_match': multi_match},
{
"term"
:
{
"tag_list"
:
tag_id
}},
{
"term"
:
{
"user_nick_name_pre"
:
query
.
lower
()}}
]
}
}
"weight"
:
10
,
"filter"
:{
"term"
:{
"language_type"
:
1
}
}
},
{
"weight"
:
400
,
"filter"
:
{
"constant_score"
:
{
"weight"
:
1000
,
"filter"
:
{
"bool"
:
{
"must"
:
{
"term"
:
{
"content_level"
:
4
},
},
"minimum_should_match"
:
1
,
"should"
:
[
{
'match_phrase'
:
{
"content"
:
query
}},
{
'match_phrase'
:
{
"tag_name_list"
:
query
}},
# {'multi_match': multi_match},
{
"term"
:
{
"tag_list"
:
tag_id
}},
{
"terms"
:
{
"tag_list"
:
same_tagset_ids
}},
{
"term"
:
{
"user_nick_name_pre"
:
query
.
lower
()}}
]
}
}
}
]
for
query_item
in
collect_tag_name_set
:
for
field_item
in
query_fields
:
term_dict
=
{
"filter"
:
{
"term"
:
{
field_item
:
query_item
}
},
"weight"
:
5
}
]
functions_list
.
append
(
term_dict
)
query_function_score
[
"functions"
]
=
functions_list
q
[
"query"
][
"function_score"
]
=
query_function_score
q
[
"sort"
]
=
[
...
...
This diff is collapsed.
Click to expand it.
trans2es/management/commands/trans2es_data2es_parallel.py
View file @
6c96b6f8
...
...
@@ -29,6 +29,12 @@ from search.utils.common import *
from
linucb.views.collect_data
import
CollectData
from
injection.data_sync.tasks
import
sync_user_similar_score
from
trans2es.models.tag
import
Tag
from
libs.cache
import
redis_client
from
trans2es.models.tag
import
TopicTag
class
Job
(
object
):
__es
=
None
...
...
@@ -189,6 +195,20 @@ class Command(BaseCommand):
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
def
sync_tag_collecction_name_set
(
self
):
try
:
collection_redis_key_name
=
"physical:official_tag_name_set"
tag_list
=
TopicTag
.
objects
.
filter
(
is_online
=
True
,
is_collection
=
True
)
.
values_list
(
"tag_id"
,
flat
=
True
)
for
tag_id
in
tag_list
:
tag_name
=
Tag
.
objects
.
filter
(
id
=
tag_id
,
is_online
=
True
,
is_deleted
=
False
,
is_category
=
False
)
.
values_list
(
"name"
,
flat
=
True
)
if
tag_name
and
len
(
tag_name
)
>
0
and
tag_name
[
0
]:
redis_client
.
sadd
(
collection_redis_key_name
,
tag_name
[
0
])
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
def
handle
(
self
,
*
args
,
**
options
):
try
:
type_name_list
=
get_type_info_map
()
.
keys
()
...
...
@@ -211,5 +231,9 @@ class Command(BaseCommand):
if
len
(
options
[
"sync_type"
])
and
options
[
"sync_type"
]
==
"similar"
:
sync_user_similar_score
()
if
len
(
options
[
"sync_type"
])
and
options
[
"sync_type"
]
==
"tagname"
:
self
.
sync_tag_collecction_name_set
()
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment