Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
62ab70c8
Commit
62ab70c8
authored
Apr 29, 2019
by
lixiaofang
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'test' of git.wanmeizhensuo.com:alpha/physical into test
parents
67997353
c90bab1a
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
166 additions
and
106 deletions
+166
-106
Project.xml
.idea/codeStyles/Project.xml
+0
-25
misc.xml
.idea/misc.xml
+1
-1
physical.iml
.idea/physical.iml
+1
-1
es.py
libs/es.py
+93
-3
collect_data.py
linucb/views/collect_data.py
+21
-14
topic.py
search/utils/topic.py
+26
-46
tag.py
search/views/tag.py
+3
-3
topic.py
search/views/topic.py
+13
-5
topic.py
trans2es/models/topic.py
+8
-8
No files found.
.idea/codeStyles/Project.xml
View file @
62ab70c8
...
...
@@ -24,29 +24,5 @@
<option
name=
"ITERATION_ELEMENTS_WRAPPING"
value=
"chop_down_if_not_single"
/>
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options
enabled=
"false"
>
<option
name=
"KEYWORD_CASE"
value=
"lower"
/>
<option
name=
"FUNCTION_CASE"
value=
"lower"
/>
<option
name=
"PARAMETER_CASE"
value=
"lower"
/>
<option
name=
"DATATYPE_CASE"
value=
"lower"
/>
<option
name=
"OBJECT_CASE"
value=
"preserve"
/>
</case-options>
<formatting-settings
enabled=
"false"
/>
</DBN-PSQL>
<DBN-SQL>
<case-options
enabled=
"false"
>
<option
name=
"KEYWORD_CASE"
value=
"lower"
/>
<option
name=
"FUNCTION_CASE"
value=
"lower"
/>
<option
name=
"PARAMETER_CASE"
value=
"lower"
/>
<option
name=
"DATATYPE_CASE"
value=
"lower"
/>
<option
name=
"OBJECT_CASE"
value=
"preserve"
/>
</case-options>
<formatting-settings
enabled=
"false"
>
<option
name=
"STATEMENT_SPACING"
value=
"one_line"
/>
<option
name=
"CLAUSE_CHOP_DOWN"
value=
"chop_down_if_statement_long"
/>
<option
name=
"ITERATION_ELEMENTS_WRAPPING"
value=
"chop_down_if_not_single"
/>
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
.idea/misc.xml
View file @
62ab70c8
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
venv
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
physical1
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"PyCharmProfessionalAdvertiser"
>
<option
name=
"shown"
value=
"true"
/>
</component>
...
...
.idea/physical.iml
View file @
62ab70c8
...
...
@@ -2,7 +2,7 @@
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.6 (
venv
)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"jdk"
jdkName=
"Python 3.6 (
physical1
)"
jdkType=
"Python SDK"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
<component
name=
"TestRunnerService"
>
...
...
libs/es.py
View file @
62ab70c8
...
...
@@ -286,6 +286,89 @@ class ESPerform(object):
@classmethod
def
get_tag_topic_list
(
cls
,
tag_id
,
have_read_topic_id_list
,
size
=
100
):
try
:
functions_list
=
list
()
# for id in tag_id:
# functions_list.append(
# {
# "filter": {"term": {"tag_list": id}},
# "weight": 1
# }
# )
functions_list
+=
[
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
6
}}
}
},
"weight"
:
60
},
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
5
}}
}
},
"weight"
:
50
},
{
"filter"
:
{
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
4
}}
}
},
"weight"
:
40
}
]
q
=
{
"query"
:
{
"function_score"
:
{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
6
}}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}},
{
"terms"
:
{
"tag_list"
:
tag_id
}}
]
}
},
"boost_mode"
:
"sum"
,
"score_mode"
:
"sum"
,
"functions"
:
functions_list
}
},
"_source"
:
{
"include"
:
[
"id"
]
},
"sort"
:
[
{
"_score"
:
{
"order"
:
"desc"
}},
{
"create_time_val"
:
{
"order"
:
"desc"
}},
# {"language_type": {"order": "asc"}},
]
}
if
len
(
have_read_topic_id_list
)
>
0
:
q
[
"query"
][
"function_score"
][
"query"
][
"bool"
][
"must_not"
]
=
{
"terms"
:
{
"id"
:
have_read_topic_id_list
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
0
,
size
=
size
,
routing
=
"4,5,6"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_list
))
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
@classmethod
def
get_tag_topic_list_dict
(
cls
,
tag_id
,
have_read_topic_id_list
,
size
=
100
):
try
:
functions_list
=
list
()
for
id
in
tag_id
:
...
...
@@ -328,7 +411,7 @@ class ESPerform(object):
}
},
"_source"
:
{
"include"
:
[
"id"
]
"include"
:
[
"id"
,
"user_id"
]
},
"sort"
:
[
{
"_score"
:
{
"order"
:
"desc"
}},
...
...
@@ -346,8 +429,15 @@ class ESPerform(object):
offset
=
0
,
size
=
size
,
routing
=
"4,5,6"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_list
))
return
topic_id_list
# logging.info("topic_id_list:%s" % str(topic_id_list))
# topic_id_dict = [{str(item["_source"]["id"]):item["_source"]["user_id"]} for item in result_dict["hits"]]
topic_id_dict
=
dict
()
for
item
in
result_dict
[
"hits"
]:
topic_id_dict
[
str
(
item
[
"_source"
][
"id"
])]
=
item
[
"_source"
][
"user_id"
]
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_dict
))
return
topic_id_list
,
topic_id_dict
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
list
()
linucb/views/collect_data.py
View file @
62ab70c8
...
...
@@ -74,18 +74,23 @@ class CollectData(object):
have_read_topic_id_list
.
extend
(
promote_recommend_topic_id_list
)
recommend_topic_id_list
=
list
()
recommend_topic_id_list_dict
=
dict
()
recommend_topic_id_list_click
=
list
()
recommend_topic_id_list_click_dict
=
dict
()
if
click_topic_tag_list
:
if
len
(
click_topic_tag_list
)
>
0
:
recommend_topic_id_list_click
=
ESPerform
.
get_tag_topic_lis
t
(
click_topic_tag_list
,
recommend_topic_id_list_click
,
recommend_topic_id_list_click_dict
=
ESPerform
.
get_tag_topic_list_dic
t
(
click_topic_tag_list
,
have_read_topic_id_list
,
size
=
2
)
if
len
(
recommend_topic_id_list_click
)
>
0
:
recommend_topic_id_list
.
extend
(
recommend_topic_id_list_click
)
have_read_topic_id_list
.
extend
(
recommend_topic_id_list
)
recommend_topic_id_list_dict
.
update
(
recommend_topic_id_list_click_dict
)
have_read_topic_id_list
.
extend
(
recommend_topic_id_list_click
)
click_recommend_redis_key
=
self
.
click_recommend_redis_key_prefix
+
str
(
device_id
)
click_redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"datadict"
:
json
.
dumps
(
recommend_topic_id_list_dict
),
"cursor"
:
0
}
redis_client
.
hmset
(
click_recommend_redis_key
,
click_redis_data_dict
)
...
...
@@ -101,28 +106,32 @@ class CollectData(object):
b
"data"
]
else
[]
cursor
=
int
(
str
(
redis_topic_data_dict
[
b
"cursor"
],
encoding
=
"utf-8"
))
if
len
(
recommend_topic_id_list
)
==
0
and
cursor
==
0
and
len
(
redis_topic_list
)
>
0
:
have_read_topic_id_list
.
extend
(
redis_topic_list
[:
2
])
if
len
(
tag_id_list
)
>
0
:
if
len
(
new_user_click_tag_list
)
>
0
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
new_user_click_tag_list
,
have_read_topic_id_list
)
logging
.
warning
(
"tag_topic_id_list:
%
s"
%
str
(
new_user_click_tag_list
))
logging
.
warning
(
"tag_id_list:
%
s"
%
str
(
tag_id_list
))
else
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id_list
,
have_read_topic_id_list
)
tag_topic_dict
=
dict
()
if
len
(
new_user_click_tag_list
)
>
0
:
tag_topic_id_list
,
tag_topic_dict
=
ESPerform
.
get_tag_topic_list_dict
(
new_user_click_tag_list
,
have_read_topic_id_list
)
else
:
tag_topic_id_list
,
tag_topic_dict
=
ESPerform
.
get_tag_topic_list_dict
(
tag_id_list
,
have_read_topic_id_list
)
if
len
(
recommend_topic_id_list
)
>
0
or
len
(
new_user_click_tag_list
)
>
0
:
tag_topic_id_list
=
recommend_topic_id_list
+
tag_topic_id_list
tag_topic_dict
=
recommend_topic_id_list_dict
.
update
(
tag_topic_dict
)
logging
.
warning
(
"catch exception,err_msg:
%
s"
%
str
(
tag_topic_dict
))
redis_data_dict
=
{
"data"
:
json
.
dumps
(
tag_topic_id_list
),
"datadict"
:
json
.
dumps
(
tag_topic_dict
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
else
:
if
cursor
<=
0
and
len
(
redis_topic_list
)
>
0
:
tag_topic_id_list
=
redis_topic_list
[:
2
]
+
tag_topic_id_list
tag_topic_id_list
=
list
(
set
(
tag_topic_id_list
))
tag_topic_dict
=
list
()
tag_topic_dict
=
redis_topic_list
[:
2
]
tag_topic_dict
=
list
(
set
(
tag_topic_dict
))
return
True
except
:
...
...
@@ -251,9 +260,7 @@ class CollectData(object):
else
:
tagid_list
=
list
()
logging
.
warning
(
"unknown type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
,
"missing type"
))
logging
.
info
(
"consume click topic_id:
%
s,device_id:
%
s"
%
(
str
(
tagid_list
),
str
(
device_id
)))
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
...
...
search/utils/topic.py
View file @
62ab70c8
...
...
@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
single_size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
,
attention_tag_list
=
[],
current_topic_id
=-
1
,
topic_tag_list
=
[],
topic_user_id
=-
1
):
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
,
attention_tag_list
=
[],
linucb_user_id_list
=
[]
):
"""
:remark:获取首页推荐帖子列表
:param user_id:
...
...
@@ -177,32 +177,28 @@ class TopicUtils(object):
},
{
"filter"
:
{
"term"
:
{
"content_level"
:
6
}
},
"weight"
:
1000
},
{
"filter"
:
{
"term"
:
{
"is_excellent"
:
1
"constant_score"
:{
"filter"
:{
"term"
:
{
"content_level"
:
6
}
}
}
},
"weight"
:
5
00
"weight"
:
6
00
}
]
if
len
(
user_similar_score_list
)
>
0
:
for
item
in
user_similar_score_list
[:
100
]:
score_item
=
2
+
item
[
1
]
functions_list
.
append
(
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"term"
:
{
"user_id"
:
item
[
0
]}}}},
"weight"
:
score_item
,
}
)
#
if len(user_similar_score_list) > 0:
#
for item in user_similar_score_list[:100]:
#
score_item = 2 + item[1]
#
functions_list.append(
#
{
#
"filter": {"bool": {
#
"should": {"term": {"user_id": item[0]}}}},
#
"weight": score_item,
#
}
#
)
if
len
(
attention_user_id_list
)
>
0
:
functions_list
.
append
(
...
...
@@ -220,25 +216,7 @@ class TopicUtils(object):
"weight"
:
100
}
)
if
current_topic_id
!=
-
1
:
if
len
(
topic_tag_list
)
>
0
:
functions_list
.
append
(
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"terms"
:
{
"tag_list"
:
topic_tag_list
}}}},
"weight"
:
2000
}
)
if
topic_user_id
!=
-
1
:
functions_list
.
append
(
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"term"
:
{
"user_id"
:
topic_user_id
}}}},
"weight"
:
1500
}
)
query_function_score
=
{
"query"
:
{
"bool"
:
{
...
...
@@ -281,12 +259,14 @@ class TopicUtils(object):
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[
{
"terms"
:{
"id"
:
filter_topic_id_list
}}
]
if
current_topic_id
!=
-
1
:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[{
"term"
:
{
"id"
:
current_topic_id
}
}]
if
"must_not"
in
query_function_score
[
"query"
][
"bool"
]:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
+=
[
{
"terms"
:
{
"user_id"
:
linucb_user_id_list
}}
]
else
:
query_function_score
[
"query"
][
"bool"
][
"must_not"
]
=
[
{
"term"
:
{
"user_id"
:
linucb_user_id_list
}}
]
if
query
is
not
None
:
# 搜索帖子
multi_fields
=
{
'description'
:
200
,
...
...
search/views/tag.py
View file @
62ab70c8
...
...
@@ -155,7 +155,7 @@ def choice_push_tag(device_id, user_id):
@bind
(
"physical/search/identity_tag_name"
)
def
identity_tag_name
(
topic_content
):
try
:
ret_tag_
list
=
lis
t
()
ret_tag_
set
=
se
t
()
redis_key_name
=
"physical:tag_name_set"
body
=
{
...
...
@@ -171,9 +171,9 @@ def identity_tag_name(topic_content):
token_word
=
item
[
"token"
]
is_member
=
redis_client
.
sismember
(
redis_key_name
,
token_word
)
if
is_member
:
ret_tag_
list
.
appen
d
(
token_word
)
ret_tag_
set
.
ad
d
(
token_word
)
return
{
"tag_name_list"
:
ret_tag_list
}
return
{
"tag_name_list"
:
list
(
ret_tag_set
)
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"tag_name_list"
:
[]}
...
...
search/views/topic.py
View file @
62ab70c8
...
...
@@ -73,10 +73,10 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
if
offset
>
0
:
# 首次搜索时不需要过滤已读
have_read_topic_id_list
=
list
(
json
.
loads
(
redis_field_val_list
[
0
]))
user_similar_score_redis_key
=
"physical:user_similar_score:user_id:"
+
str
(
user_id
)
redis_user_similar_score_redis_val
=
redis_client
.
get
(
user_similar_score_redis_key
)
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
#
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
#
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
#
user_similar_score_redis_list = json.loads(
#
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
attention_tag_list
=
list
()
recommend_topic_list
=
list
()
...
...
@@ -87,6 +87,8 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_dict
=
redis_client
.
hgetall
(
topic_recommend_redis_key
)
if
b
"data"
in
recommend_topic_dict
:
recommend_topic_id_list
=
json
.
loads
(
recommend_topic_dict
[
b
"data"
])
# 推荐帖子是强插的,要保证推荐帖子不在已读里
recommend_topic_id_list
=
list
(
set
(
recommend_topic_id_list
)
-
set
(
have_read_topic_id_list
))
cursor
=
int
(
str
(
recommend_topic_dict
[
b
"cursor"
],
encoding
=
"utf-8"
))
...
...
@@ -95,6 +97,12 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
newcursor
]
redis_client
.
hset
(
topic_recommend_redis_key
,
"cursor"
,
newcursor
)
recommend_topic_user_list
=
list
()
if
b
"datadict"
in
recommend_topic_dict
:
recommend_topic_id_dict
=
json
.
loads
(
recommend_topic_dict
[
b
"datadict"
])
if
len
(
recommend_topic_list
)
==
6
:
for
i
in
recommend_topic_list
:
recommend_topic_user_list
.
append
(
recommend_topic_id_dict
[
str
(
i
)])
# 用户关注标签
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
attention_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
...
...
@@ -114,7 +122,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
rank_topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
0
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic-high-star"
,
routing
=
"4,5,6"
,
attention_tag_list
=
attention_tag
_list
)
index_type
=
"topic-high-star"
,
routing
=
"4,5,6"
,
attention_tag_list
=
attention_tag_list
,
linucb_user_id_list
=
recommend_topic_user
_list
)
if
len
(
recommend_topic_list
)
==
6
and
query
is
None
:
if
(
size
<
11
):
...
...
trans2es/models/topic.py
View file @
62ab70c8
...
...
@@ -209,7 +209,7 @@ class Topic(models.Model):
user_query_results
=
UserExtra
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
user_id
=
self
.
user_id
)
if
user_query_results
.
count
()
>
0
:
if
user_query_results
[
0
]
.
is_recommend
:
offline_score
+=
2.0
offline_score
+=
2.0
*
10
elif
user_query_results
[
0
]
.
is_shadow
:
user_is_shadow
=
True
...
...
@@ -219,18 +219,18 @@ class Topic(models.Model):
# 帖子等级
if
self
.
content_level
==
'5'
:
offline_score
+=
6.0
offline_score
+=
100.0
*
3
elif
self
.
content_level
==
'4'
:
offline_score
+=
5.0
offline_score
+=
60.0
*
3
elif
self
.
content_level
==
'6'
:
offline_score
+=
400.0
offline_score
+=
200.0
*
3
is_excellent
=
self
.
judge_if_excellent_topic
(
self
.
id
)
if
is_excellent
:
offline_score
+=
200.0
#
is_excellent = self.judge_if_excellent_topic(self.id)
#
if is_excellent:
#
offline_score += 200.0
if
self
.
language_type
==
1
:
offline_score
+=
10
0.0
offline_score
+=
6
0.0
# exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
# click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
# uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment