Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
d10f2829
Commit
d10f2829
authored
Apr 16, 2019
by
haowang
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/alpha/physical
into haow/dev
parents
c10eb54e
10ced1c0
Hide whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
501 additions
and
556 deletions
+501
-556
misc.xml
.idea/misc.xml
+1
-1
physical.iml
.idea/physical.iml
+1
-1
workspace.xml
.idea/workspace.xml
+0
-332
tasks.py
injection/data_sync/tasks.py
+18
-14
es.py
libs/es.py
+50
-24
register_user_tag.py
linucb/utils/register_user_tag.py
+48
-40
collect_data.py
linucb/views/collect_data.py
+78
-29
linucb.py
linucb/views/linucb.py
+7
-2
group.py
search/utils/group.py
+33
-29
topic.py
search/utils/topic.py
+11
-12
group.py
search/views/group.py
+48
-4
tag.py
search/views/tag.py
+8
-28
topic.py
search/views/topic.py
+29
-5
pictorial.json
trans2es/mapping/pictorial.json
+3
-2
topic-high-star.json
trans2es/mapping/topic-high-star.json
+4
-2
topic-star-routing.json
trans2es/mapping/topic-star-routing.json
+4
-2
topic-v1.json
trans2es/mapping/topic-v1.json
+2
-0
topic.json
trans2es/mapping/topic.json
+3
-2
pictorial.py
trans2es/models/pictorial.py
+20
-0
tag.py
trans2es/models/tag.py
+2
-0
topic.py
trans2es/models/topic.py
+38
-16
type_info.py
trans2es/type_info.py
+47
-10
excellect_topic_transfer.py
trans2es/utils/excellect_topic_transfer.py
+34
-0
pictorial_transfer.py
trans2es/utils/pictorial_transfer.py
+1
-0
topic_transfer.py
trans2es/utils/topic_transfer.py
+11
-1
No files found.
.idea/misc.xml
View file @
d10f2829
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
venv
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Python 3.6 (
physical1
)"
project-jdk-type=
"Python SDK"
/>
<component
name=
"PyCharmProfessionalAdvertiser"
>
<option
name=
"shown"
value=
"true"
/>
</component>
...
...
.idea/physical.iml
View file @
d10f2829
...
...
@@ -2,7 +2,7 @@
<module
type=
"PYTHON_MODULE"
version=
"4"
>
<component
name=
"NewModuleRootManager"
>
<content
url=
"file://$MODULE_DIR$"
/>
<orderEntry
type=
"
inheritedJdk
"
/>
<orderEntry
type=
"
jdk"
jdkName=
"Python 3.6 (physical1)"
jdkType=
"Python SDK
"
/>
<orderEntry
type=
"sourceFolder"
forTests=
"false"
/>
</component>
<component
name=
"TestRunnerService"
>
...
...
.idea/workspace.xml
deleted
100644 → 0
View file @
c10eb54e
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"ChangeListManager"
>
<list
default=
"true"
id=
"d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e"
name=
"Default Changelist"
comment=
""
>
<change
beforePath=
"$PROJECT_DIR$/.idea/workspace.xml"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/.idea/workspace.xml"
afterDir=
"false"
/>
<change
beforePath=
"$PROJECT_DIR$/trans2es/models/topic.py"
beforeDir=
"false"
afterPath=
"$PROJECT_DIR$/trans2es/models/topic.py"
afterDir=
"false"
/>
</list>
<option
name=
"EXCLUDED_CONVERTED_TO_IGNORED"
value=
"true"
/>
<option
name=
"SHOW_DIALOG"
value=
"false"
/>
<option
name=
"HIGHLIGHT_CONFLICTS"
value=
"true"
/>
<option
name=
"HIGHLIGHT_NON_ACTIVE_CHANGELIST"
value=
"false"
/>
<option
name=
"LAST_RESOLUTION"
value=
"IGNORE"
/>
</component>
<component
name=
"FUSProjectUsageTrigger"
>
<session
id=
"-127591351"
>
<usages-collector
id=
"statistics.lifecycle.project"
>
<counts>
<entry
key=
"project.open.time.1"
value=
"1"
/>
<entry
key=
"project.opened"
value=
"1"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.extensions.open"
>
<counts>
<entry
key=
"py"
value=
"5"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.types.open"
>
<counts>
<entry
key=
"Python"
value=
"5"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.extensions.edit"
>
<counts>
<entry
key=
"py"
value=
"28"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.types.edit"
>
<counts>
<entry
key=
"Python"
value=
"28"
/>
</counts>
</usages-collector>
</session>
<session
id=
"1569122105"
>
<usages-collector
id=
"statistics.file.extensions.open"
>
<counts>
<entry
key=
"py"
value=
"2"
/>
<entry
key=
"template"
value=
"1"
/>
<entry
key=
"xml"
value=
"1"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.types.open"
>
<counts>
<entry
key=
"PLAIN_TEXT"
value=
"1"
/>
<entry
key=
"Python"
value=
"2"
/>
<entry
key=
"XML"
value=
"1"
/>
</counts>
</usages-collector>
</session>
<session
id=
"-609148713"
>
<usages-collector
id=
"statistics.file.extensions.edit"
>
<counts>
<entry
key=
"py"
value=
"3"
/>
</counts>
</usages-collector>
<usages-collector
id=
"statistics.file.types.edit"
>
<counts>
<entry
key=
"Python"
value=
"3"
/>
</counts>
</usages-collector>
</session>
</component>
<component
name=
"FileEditorManager"
>
<leaf>
<file
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/trans2es/mapping/topic.json"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"105"
>
<caret
line=
"7"
column=
"19"
selection-start-line=
"7"
selection-start-column=
"5"
selection-end-line=
"7"
selection-end-column=
"19"
/>
</state>
</provider>
</entry>
</file>
<file
pinned=
"false"
current-in-tab=
"true"
>
<entry
file=
"file://$PROJECT_DIR$/trans2es/models/topic.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"270"
>
<caret
line=
"256"
column=
"35"
selection-start-line=
"256"
selection-start-column=
"35"
selection-end-line=
"256"
selection-end-column=
"35"
/>
</state>
</provider>
</entry>
</file>
<file
pinned=
"false"
current-in-tab=
"false"
>
<entry
file=
"file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"243"
>
<caret
line=
"112"
selection-start-line=
"112"
selection-end-line=
"112"
/>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component
name=
"Git.Settings"
>
<option
name=
"RECENT_GIT_ROOT_PATH"
value=
"$PROJECT_DIR$"
/>
</component>
<component
name=
"IdeDocumentHistory"
>
<option
name=
"CHANGED_PATHS"
>
<list>
<option
value=
"$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py"
/>
<option
value=
"$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py"
/>
<option
value=
"$PROJECT_DIR$/search/views/topic.py"
/>
<option
value=
"$PROJECT_DIR$/search/utils/topic.py"
/>
<option
value=
"$PROJECT_DIR$/physical/settings.py"
/>
<option
value=
"$PROJECT_DIR$/trans2es/models/topic.py"
/>
</list>
</option>
</component>
<component
name=
"ProjectFrameBounds"
extendedState=
"1"
>
<option
name=
"y"
value=
"23"
/>
<option
name=
"width"
value=
"1280"
/>
<option
name=
"height"
value=
"724"
/>
</component>
<component
name=
"ProjectInspectionProfilesVisibleTreeState"
>
<entry
key=
"Project Default"
>
<profile-state>
<expanded-state>
<State
/>
<State>
<id>
General
</id>
</State>
</expanded-state>
<selected-state>
<State>
<id>
Buildout
</id>
</State>
</selected-state>
</profile-state>
</entry>
</component>
<component
name=
"ProjectView"
>
<navigator
proportions=
""
version=
"1"
>
<foldersAlwaysOnTop
value=
"true"
/>
</navigator>
<panes>
<pane
id=
"Scope"
/>
<pane
id=
"ProjectPane"
>
<subPane>
<expand>
<path>
<item
name=
"physical"
type=
"b2602c69:ProjectViewProjectNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
</path>
<path>
<item
name=
"physical"
type=
"b2602c69:ProjectViewProjectNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
</path>
<path>
<item
name=
"physical"
type=
"b2602c69:ProjectViewProjectNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"trans2es"
type=
"462c0819:PsiDirectoryNode"
/>
</path>
<path>
<item
name=
"physical"
type=
"b2602c69:ProjectViewProjectNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"trans2es"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"models"
type=
"462c0819:PsiDirectoryNode"
/>
</path>
<path>
<item
name=
"physical"
type=
"b2602c69:ProjectViewProjectNode"
/>
<item
name=
"physical"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"trans2es"
type=
"462c0819:PsiDirectoryNode"
/>
<item
name=
"utils"
type=
"462c0819:PsiDirectoryNode"
/>
</path>
</expand>
<select
/>
</subPane>
</pane>
</panes>
</component>
<component
name=
"PropertiesComponent"
>
<property
name=
"last_opened_file_path"
value=
"$PROJECT_DIR$"
/>
<property
name=
"settings.editor.selected.configurable"
value=
"com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
/>
</component>
<component
name=
"RecentsManager"
>
<key
name=
"MoveFile.RECENT_KEYS"
>
<recent
name=
"$PROJECT_DIR$/search/views"
/>
</key>
</component>
<component
name=
"RunDashboard"
>
<option
name=
"ruleStates"
>
<list>
<RuleState>
<option
name=
"name"
value=
"ConfigurationTypeDashboardGroupingRule"
/>
</RuleState>
<RuleState>
<option
name=
"name"
value=
"StatusDashboardGroupingRule"
/>
</RuleState>
</list>
</option>
</component>
<component
name=
"SvnConfiguration"
>
<configuration
/>
</component>
<component
name=
"TaskManager"
>
<task
active=
"true"
id=
"Default"
summary=
"Default task"
>
<changelist
id=
"d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e"
name=
"Default Changelist"
comment=
""
/>
<created>
1548319196437
</created>
<option
name=
"number"
value=
"Default"
/>
<option
name=
"presentableId"
value=
"Default"
/>
<updated>
1548319196437
</updated>
</task>
<servers
/>
</component>
<component
name=
"ToolWindowManager"
>
<frame
x=
"0"
y=
"23"
width=
"1280"
height=
"724"
extended-state=
"1"
/>
<layout>
<window_info
content_ui=
"combo"
id=
"Project"
order=
"0"
visible=
"true"
weight=
"0.2494043"
/>
<window_info
id=
"Structure"
order=
"1"
side_tool=
"true"
weight=
"0.25"
/>
<window_info
id=
"DB Browser"
order=
"2"
/>
<window_info
id=
"Favorites"
order=
"3"
side_tool=
"true"
/>
<window_info
anchor=
"bottom"
id=
"Message"
order=
"0"
/>
<window_info
anchor=
"bottom"
id=
"Find"
order=
"1"
/>
<window_info
anchor=
"bottom"
id=
"Run"
order=
"2"
/>
<window_info
anchor=
"bottom"
id=
"Debug"
order=
"3"
weight=
"0.4"
/>
<window_info
anchor=
"bottom"
id=
"Cvs"
order=
"4"
weight=
"0.25"
/>
<window_info
anchor=
"bottom"
id=
"Inspection"
order=
"5"
weight=
"0.4"
/>
<window_info
anchor=
"bottom"
id=
"TODO"
order=
"6"
/>
<window_info
anchor=
"bottom"
id=
"Version Control"
order=
"7"
show_stripe_button=
"false"
/>
<window_info
anchor=
"bottom"
id=
"DB Execution Console"
order=
"8"
/>
<window_info
active=
"true"
anchor=
"bottom"
id=
"Terminal"
order=
"9"
visible=
"true"
weight=
"0.16420361"
/>
<window_info
anchor=
"bottom"
id=
"Python Console"
order=
"10"
/>
<window_info
anchor=
"bottom"
id=
"Event Log"
order=
"11"
side_tool=
"true"
/>
<window_info
anchor=
"right"
id=
"Commander"
internal_type=
"SLIDING"
order=
"0"
type=
"SLIDING"
weight=
"0.4"
/>
<window_info
anchor=
"right"
id=
"Ant Build"
order=
"1"
weight=
"0.25"
/>
<window_info
anchor=
"right"
content_ui=
"combo"
id=
"Hierarchy"
order=
"2"
weight=
"0.25"
/>
</layout>
</component>
<component
name=
"VcsContentAnnotationSettings"
>
<option
name=
"myLimit"
value=
"2678400000"
/>
</component>
<component
name=
"editorHistoryManager"
>
<entry
file=
"file://$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"240"
>
<caret
line=
"21"
column=
"26"
selection-start-line=
"21"
selection-start-column=
"26"
selection-end-line=
"21"
selection-end-column=
"26"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"364"
>
<caret
line=
"129"
column=
"63"
selection-start-line=
"129"
selection-start-column=
"54"
selection-end-line=
"129"
selection-end-column=
"63"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/search/utils/topic.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"1760"
>
<caret
line=
"293"
column=
"66"
selection-start-line=
"293"
selection-start-column=
"66"
selection-end-line=
"293"
selection-end-column=
"66"
/>
<folding>
<element
signature=
"e#47#61#0"
expanded=
"true"
/>
<element
signature=
"e#363#1043#0"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/app_conf.xml"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
/>
</entry>
<entry
file=
"file://$PROJECT_DIR$/physical/settings_local.py.template"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-1987"
/>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/search/views/contrast_similar.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
/>
</entry>
<entry
file=
"file://$PROJECT_DIR$/search/views/topic.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"209"
>
<caret
line=
"85"
column=
"46"
selection-start-line=
"85"
selection-start-column=
"46"
selection-end-line=
"85"
selection-end-column=
"46"
/>
<folding>
<element
signature=
"e#463#1573#0"
/>
<element
signature=
"e#6049#6240#0"
/>
<element
signature=
"e#7051#7614#0"
/>
<element
signature=
"e#8492#10033#0"
/>
<element
signature=
"e#10162#10820#0"
/>
<element
signature=
"e#10934#11363#0"
/>
</folding>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/physical/settings.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"-2448"
>
<caret
line=
"12"
column=
"9"
lean-forward=
"true"
selection-start-line=
"12"
selection-start-column=
"9"
selection-end-line=
"12"
selection-end-column=
"9"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/trans2es/mapping/topic.json"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"105"
>
<caret
line=
"7"
column=
"19"
selection-start-line=
"7"
selection-start-column=
"5"
selection-end-line=
"7"
selection-end-column=
"19"
/>
</state>
</provider>
</entry>
<entry
file=
"file://$PROJECT_DIR$/trans2es/models/topic.py"
>
<provider
selected=
"true"
editor-type-id=
"text-editor"
>
<state
relative-caret-position=
"270"
>
<caret
line=
"256"
column=
"35"
selection-start-line=
"256"
selection-start-column=
"35"
selection-end-line=
"256"
selection-end-column=
"35"
/>
</state>
</provider>
</entry>
</component>
<component
name=
"masterDetails"
>
<states>
<state
key=
"ScopeChooserConfigurable.UI"
>
<settings>
<splitter-proportions>
<option
name=
"proportions"
>
<list>
<option
value=
"0.2"
/>
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
\ No newline at end of file
injection/data_sync/tasks.py
View file @
d10f2829
...
...
@@ -11,22 +11,26 @@ import traceback
from
libs.cache
import
redis_client
from
trans2es.models.face_user_contrast_similar
import
FaceUserContrastSimilar
,
UserSimilarScore
import
json
from
linucb.utils.register_user_tag
import
RegisterUserTag
@shared_task
def
write_to_es
(
es_type
,
pk_list
,
use_batch_query_set
=
False
):
try
:
pk_list
=
list
(
frozenset
(
pk_list
))
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"duan add,es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
if
es_type
==
"register_user_tag"
:
RegisterUserTag
.
get_register_user_tag
(
pk_list
)
else
:
type_info_map
=
get_type_info_map
()
type_info
=
type_info_map
[
es_type
]
logging
.
info
(
"consume es_type:
%
s"
%
str
(
es_type
))
type_info
.
insert_table_by_pk_list
(
sub_index_name
=
es_type
,
pk_list
=
pk_list
,
use_batch_query_set
=
use_batch_query_set
,
es
=
ESPerform
.
get_cli
()
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -37,7 +41,7 @@ def sync_face_similar_data_to_redis():
result_items
=
FaceUserContrastSimilar
.
objects
.
filter
(
is_online
=
True
,
is_deleted
=
False
)
.
distinct
()
.
values
(
"participant_user_id"
)
.
values_list
(
"participant_user_id"
,
flat
=
True
)
logging
.
info
(
"
duan add,
begin sync_face_similar_data_to_redis!"
)
logging
.
info
(
"begin sync_face_similar_data_to_redis!"
)
redis_key_prefix
=
"physical:user_similar:participant_user_id:"
for
participant_user_id
in
result_items
:
...
...
@@ -58,7 +62,7 @@ def sync_face_similar_data_to_redis():
)
redis_client
.
set
(
redis_key
,
json
.
dumps
(
item_list
))
logging
.
info
(
"
duan add,
participant_user_id:
%
d set data done!"
%
participant_user_id
)
logging
.
info
(
"participant_user_id:
%
d set data done!"
%
participant_user_id
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -68,7 +72,7 @@ def sync_user_similar_score():
results_items
=
UserSimilarScore
.
objects
.
filter
(
is_deleted
=
False
)
.
distinct
()
.
values
(
"user_id"
)
.
values_list
(
"user_id"
,
flat
=
True
)
redis_key_prefix
=
"physical:user_similar_score:user_id:"
logging
.
info
(
"
duan add,
begin sync user_similar_score!"
)
logging
.
info
(
"begin sync user_similar_score!"
)
for
user_id
in
results_items
:
redis_key
=
redis_key_prefix
+
str
(
user_id
)
similar_results_items
=
UserSimilarScore
.
objects
.
filter
(
is_deleted
=
False
,
user_id
=
user_id
)
.
order_by
(
"-score"
)
...
...
libs/es.py
View file @
d10f2829
...
...
@@ -155,25 +155,27 @@ class ESPerform(object):
bulk_actions
=
[]
if
sub_index_name
==
"topic"
:
if
sub_index_name
==
"topic"
or
sub_index_name
==
"topic-star-routing"
:
for
data
in
data_list
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
'routing'
:
data
[
"content_level"
]
})
if
data
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
'routing'
:
data
[
"content_level"
]
})
else
:
for
data
in
data_list
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
})
if
data
:
bulk_actions
.
append
({
'_op_type'
:
'index'
,
'_index'
:
official_index_name
,
'_type'
:
doc_type
,
'_id'
:
data
[
'id'
],
'_source'
:
data
,
})
elasticsearch
.
helpers
.
bulk
(
es_cli
,
bulk_actions
)
return
True
...
...
@@ -262,30 +264,54 @@ class ESPerform(object):
return
True
@classmethod
def
get_tag_topic_list
(
cls
,
tag_id
):
def
get_tag_topic_list
(
cls
,
tag_id
,
have_read_topic_id_list
):
try
:
functions_list
=
list
()
for
id
in
tag_id
:
functions_list
.
append
(
{
"filter"
:
{
"term"
:
{
"tag_list"
:
id
}},
"weight"
:
1
}
)
q
=
{
"query"
:{
"bool"
:{
"must"
:[
{
"term"
:{
"is_online"
:
True
}},
{
"term"
:{
"is_deleted"
:
False
}},
{
"term"
:{
"tag_list"
:
tag_id
}}
]
"function_score"
:{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
,
"lte"
:
5
}}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}},
{
"terms"
:
{
"tag_list"
:
tag_id
}}
]
}
},
"boost_mode"
:
"sum"
,
"score_mode"
:
"sum"
,
"functions"
:
functions_list
}
},
"_source"
:{
"include"
:[
"id"
]
},
"sort"
:[
{
"_score"
:
{
"order"
:
"desc"
}},
{
"create_time_val"
:{
"order"
:
"desc"
}},
{
"language_type"
:{
"order"
:
"asc"
}},
]
}
if
len
(
have_read_topic_id_list
)
>
0
:
q
[
"query"
][
"function_score"
][
"query"
][
"bool"
][
"must_not"
]
=
{
"terms"
:{
"id"
:
have_read_topic_id_list
}
}
result_dict
=
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
sub_index_name
=
"topic"
,
query_body
=
q
,
offset
=
0
,
size
=
5000
,
routing
=
"
4,5"
)
offset
=
0
,
size
=
100
,
routing
=
"3,
4,5"
)
topic_id_list
=
[
item
[
"_source"
][
"id"
]
for
item
in
result_dict
[
"hits"
]]
logging
.
info
(
"topic_id_list:
%
s"
%
str
(
topic_id_list
))
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
linucb/utils/register_user_tag.py
View file @
d10f2829
...
...
@@ -28,54 +28,62 @@ class RegisterUserTag(object):
linucb_device_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:device_id:"
linucb_user_id_register_tag_topic_id_prefix
=
"physical:linucb:register_tag_topic_recommend:user_id:"
linucb_register_user_tag_key
=
"physical:linucb:register_user_tag_info"
@classmethod
def
get_register_user_tag
(
cls
,
pk_list
):
try
:
user_id_set
=
set
()
# user_id_set = set()
user_id_dict
=
dict
()
query_results
=
AccountUserTag
.
objects
.
filter
(
pk__in
=
pk_list
)
for
item
in
query_results
:
tag_id
=
item
.
tag_id
user_id
=
item
.
user
user_tag_list
=
AccountUserTag
.
objects
.
filter
(
user
=
user_id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
user_id_dict
[
user_id
]
=
user_tag_list
if
user_id
not
in
user_id_set
:
user_id_set
.
add
(
user_id
)
user_tag_list
=
AccountUserTag
.
objects
.
filter
(
user
=
user_id
)
.
values_list
(
"tag_id"
,
flat
=
True
)
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
-
1
,
user_id
,
TopicPageType
.
HOME_RECOMMEND
)
recommend_topic_id_list
=
list
()
cycle_num
=
int
(
10000
/
len
(
user_tag_list
))
for
index
in
range
(
0
,
cycle_num
):
for
tag_id
in
user_tag_list
:
redis_tag_id_key
=
cls
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
if
len
(
tag_topic_id_list
)
>
index
:
for
topic_id
in
tag_topic_id_list
[
index
:]:
if
topic_id
not
in
have_read_topic_id_list
and
topic_id
not
in
recommend_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
redis_register_tag_topic_data
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
redis_register_tag_topic_data
)
redis_client
.
expire
(
cls
.
linucb_user_id_register_tag_topic_id_prefix
,
30
*
24
*
60
*
60
)
topic_recommend_redis_key
=
cls
.
linucb_user_id_recommend_topic_id_prefix
+
str
(
user_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
redis_client
.
expire
(
topic_recommend_redis_key
,
30
*
24
*
60
*
60
)
for
user_id
in
user_id_dict
:
redis_client
.
hset
(
cls
.
linucb_register_user_tag_key
,
user_id
,
json
.
dumps
(
list
(
user_id_dict
[
user_id
])))
# if user_id not in user_id_set:
# user_id_set.add(user_id)
#
# user_tag_list = AccountUserTag.objects.filter(user=user_id).values_list("tag_id",flat=True)
#
# have_read_topic_id_list = Tools.get_have_read_topic_id_list(-1, user_id,
# TopicPageType.HOME_RECOMMEND)
# recommend_topic_id_list = list()
# cycle_num = int(10000/len(user_tag_list))
# for index in range(0,cycle_num):
# for tag_id in user_tag_list:
# redis_tag_id_key = cls.tag_topic_id_redis_prefix + str(tag_id)
# redis_tag_id_data = redis_client.get(redis_tag_id_key)
# tag_topic_id_list = json.loads(redis_tag_id_data) if redis_tag_id_data else []
# if not redis_tag_id_data:
# tag_topic_id_list = ESPerform.get_tag_topic_list(tag_id)
# redis_client.set(redis_tag_id_key,json.dumps(tag_topic_id_list))
# redis_client.expire(redis_tag_id_key,1*24*60*60)
#
# if len(tag_topic_id_list)>index:
# for topic_id in tag_topic_id_list[index:]:
# if topic_id not in have_read_topic_id_list and topic_id not in recommend_topic_id_list:
# recommend_topic_id_list.append(topic_id)
# break
#
# redis_register_tag_topic_data = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor": 0
# }
# redis_client.hmset(cls.linucb_user_id_register_tag_topic_id_prefix,redis_register_tag_topic_data)
# redis_client.expire(cls.linucb_user_id_register_tag_topic_id_prefix,30*24*60*60)
#
# topic_recommend_redis_key = cls.linucb_user_id_recommend_topic_id_prefix + str(user_id)
# redis_data_dict = {
# "data": json.dumps(recommend_topic_id_list),
# "cursor":0
# }
# redis_client.hmset(topic_recommend_redis_key,redis_data_dict)
# redis_client.expire(topic_recommend_redis_key,30*24*60*60)
#
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
linucb/views/collect_data.py
View file @
d10f2829
...
...
@@ -2,11 +2,12 @@
# !/usr/bin/env python
from
kafka
import
KafkaConsumer
import
random
from
libs.cache
import
redis_client
import
logging
from
linucb.views.linucb
import
LinUCB
import
json
from
trans2es.models.tag
import
TopicTag
from
trans2es.models.tag
import
TopicTag
,
Tag
import
traceback
from
django.conf
import
settings
from
libs.es
import
ESPerform
...
...
@@ -33,6 +34,7 @@ class CollectData(object):
self
.
linucb_recommend_redis_prefix
=
"physical:linucb:tag_recommend:device_id:"
self
.
linucb_recommend_topic_id_prefix
=
"physical:linucb:topic_recommend:device_id:"
self
.
tag_topic_id_redis_prefix
=
"physical:tag_id:topic_id_list:"
self
.
click_recommend_redis_key_prefix
=
"physical:click_recommend:device_id:"
# 默认
self
.
user_feature
=
[
0
,
1
]
...
...
@@ -49,52 +51,78 @@ class CollectData(object):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
dict
()
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
):
def
update_recommend_tag_list
(
self
,
device_id
,
user_feature
=
None
,
user_id
=
None
,
click_topic_tag_list
=
None
):
try
:
recommend_tag_set
=
set
()
recommend_tag_list
=
list
()
recommend_tag_dict
=
dict
()
redis_linucb_tag_data_dict
=
self
.
_get_user_linucb_info
(
device_id
)
if
len
(
redis_linucb_tag_data_dict
)
==
0
:
recommend_tag_list
=
LinUCB
.
get_default_tag_list
()
recommend_tag_list
=
LinUCB
.
get_default_tag_list
(
user_id
)
LinUCB
.
init_device_id_linucb_info
(
redis_client
,
self
.
linucb_matrix_redis_prefix
,
device_id
,
recommend_tag_list
)
else
:
user_feature
=
user_feature
if
user_feature
else
self
.
user_feature
(
recommend_tag_dict
,
recommend_tag_set
)
=
LinUCB
.
linucb_recommend_tag
(
device_id
,
redis_linucb_tag_data_dict
,
user_feature
,
list
(
redis_linucb_tag_data_dict
.
keys
()))
recommend_tag_list
=
list
(
recommend_tag_dict
.
keys
())
if
len
(
recommend_tag_dict
)
>
0
:
recommend_tag_list
=
list
(
recommend_tag_set
)
if
len
(
recommend_tag_list
)
>
0
:
tag_recommend_redis_key
=
self
.
linucb_recommend_redis_prefix
+
str
(
device_id
)
redis_client
.
set
(
tag_recommend_redis_key
,
json
.
dumps
(
recommend_tag_list
))
# Todo:设置过期时间,调研set是否支持
redis_client
.
expire
(
tag_recommend_redis_key
,
7
*
24
*
60
*
60
)
have_read_topic_id_list
=
list
()
have_read_topic_id_list
=
Tools
.
get_have_read_topic_id_list
(
device_id
,
-
1
,
TopicPageType
.
HOME_RECOMMEND
)
recommend_topic_id_list
=
list
()
for
index
in
range
(
0
,
1000
):
for
tag_id
in
recommend_tag_list
[
0
:
5
]:
redis_tag_id_key
=
self
.
tag_topic_id_redis_prefix
+
str
(
tag_id
)
redis_tag_id_data
=
redis_client
.
get
(
redis_tag_id_key
)
tag_topic_id_list
=
json
.
loads
(
redis_tag_id_data
)
if
redis_tag_id_data
else
[]
if
not
redis_tag_id_data
:
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id
)
redis_client
.
set
(
redis_tag_id_key
,
json
.
dumps
(
tag_topic_id_list
))
redis_client
.
expire
(
redis_tag_id_key
,
1
*
24
*
60
*
60
)
if
len
(
tag_topic_id_list
)
>
index
:
for
topic_id
in
tag_topic_id_list
[
index
:]:
if
topic_id
not
in
have_read_topic_id_list
and
topic_id
not
in
recommend_topic_id_list
:
recommend_topic_id_list
.
append
(
topic_id
)
break
recommend_topic_id_list_click
=
list
()
if
click_topic_tag_list
:
if
len
(
click_topic_tag_list
)
>
0
:
recommend_topic_id_list_click
=
ESPerform
.
get_tag_topic_list
(
click_topic_tag_list
,
have_read_topic_id_list
)
if
len
(
recommend_topic_id_list_click
)
>
0
:
num
=
min
(
len
(
recommend_topic_id_list_click
),
2
)
logging
.
info
(
"recommend_topic_id_list:
%
s"
%
(
str
(
num
)))
for
i
in
range
(
0
,
num
):
recommend_topic_id_list
.
append
(
recommend_topic_id_list_click
[
i
])
have_read_topic_id_list
.
extend
(
recommend_topic_id_list
)
click_recommend_redis_key
=
self
.
click_recommend_redis_key_prefix
+
str
(
device_id
)
click_redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
click_recommend_redis_key
,
click_redis_data_dict
)
total_topic_list
=
list
()
tag_topic_id_list
=
list
()
tag_id_list
=
recommend_tag_list
[
0
:
100
]
topic_recommend_redis_key
=
self
.
linucb_recommend_topic_id_prefix
+
str
(
device_id
)
redis_data_dict
=
{
"data"
:
json
.
dumps
(
recommend_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
redis_topic_data_dict
=
redis_client
.
hgetall
(
topic_recommend_redis_key
)
redis_topic_list
=
list
()
cursor
=
-
1
if
b
"data"
in
redis_topic_data_dict
:
redis_topic_list
=
json
.
loads
(
redis_topic_data_dict
[
b
"data"
])
if
redis_topic_data_dict
[
b
"data"
]
else
[]
cursor
=
int
(
str
(
redis_topic_data_dict
[
b
"cursor"
],
encoding
=
"utf-8"
))
if
len
(
recommend_topic_id_list
)
==
0
and
cursor
==
0
and
len
(
redis_topic_list
)
>
0
:
have_read_topic_id_list
.
extend
(
redis_topic_list
[:
2
])
tag_topic_id_list
=
ESPerform
.
get_tag_topic_list
(
tag_id_list
,
have_read_topic_id_list
)
if
len
(
recommend_topic_id_list
)
>
0
:
tag_topic_id_list
=
recommend_topic_id_list
+
tag_topic_id_list
redis_data_dict
=
{
"data"
:
json
.
dumps
(
tag_topic_id_list
),
"cursor"
:
0
}
redis_client
.
hmset
(
topic_recommend_redis_key
,
redis_data_dict
)
else
:
if
cursor
<=
0
and
len
(
redis_topic_list
)
>
0
:
tag_topic_id_list
=
redis_topic_list
[:
2
]
+
tag_topic_id_list
tag_topic_id_list
=
list
(
set
(
tag_topic_id_list
))
return
True
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -127,14 +155,30 @@ class CollectData(object):
if
"type"
in
raw_val_dict
and
"on_click_feed_topic_card"
==
raw_val_dict
[
"type"
]:
topic_id
=
raw_val_dict
[
"params"
][
"business_id"
]
or
raw_val_dict
[
"params"
][
"topic_id"
]
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
logging
.
info
(
"consume topic_id:
%
s,device_id:
%
s"
%
(
str
(
topic_id
),
str
(
device_id
)))
tag_list
=
list
()
click_topic_tag_list
=
list
()
collection_tag_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
)
.
values_list
(
"tag_id"
,
"is_online"
,
"is_collection"
)
if
len
(
collection_tag_sql_query_results
)
>
0
:
for
tag_id
,
is_online
,
is_collection
in
collection_tag_sql_query_results
:
if
is_online
and
is_collection
==
1
:
click_topic_tag_list
.
append
(
tag_id
)
logging
.
info
(
"click_topic_tag_list:
%
s"
%
(
str
(
click_topic_tag_list
)))
click_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
topic_id
)
.
values_list
(
"tag_id"
,
"is_online"
)
for
tag_id
,
is_online
in
click_sql_query_results
:
if
is_online
:
tag_list
.
append
(
tag_id
)
tag_sql_query_results
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
id
=
tag_id
)
.
values_list
(
"id"
,
"collection"
,
"is_ai"
)
for
id
,
collection
,
is_ai
in
tag_sql_query_results
:
if
(
is_ai
==
1
)
and
id
not
in
click_topic_tag_list
:
click_topic_tag_list
.
append
(
id
)
logging
.
info
(
"click_topic_tag_list:
%
s"
%
(
str
(
click_topic_tag_list
)))
is_click
=
1
is_vote
=
0
...
...
@@ -147,7 +191,7 @@ class CollectData(object):
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
)
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
,
click_topic_tag_list
=
click_topic_tag_list
)
elif
"type"
in
raw_val_dict
and
"page_precise_exposure"
==
raw_val_dict
[
"type"
]:
if
isinstance
(
raw_val_dict
[
"params"
][
"exposure_cards"
],
str
):
exposure_cards_list
=
json
.
loads
(
raw_val_dict
[
"params"
][
"exposure_cards"
])
...
...
@@ -156,6 +200,7 @@ class CollectData(object):
else
:
exposure_cards_list
=
list
()
device_id
=
raw_val_dict
[
"device"
][
"device_id"
]
user_id
=
raw_val_dict
[
"user_id"
]
if
"user_id"
in
raw_val_dict
else
None
exposure_topic_id_list
=
list
()
for
item
in
exposure_cards_list
:
...
...
@@ -172,7 +217,11 @@ class CollectData(object):
exposure_sql_query_results
=
TopicTag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id__in
=
exposure_topic_id_list
)
.
values_list
(
"topic_id"
,
"tag_id"
,
"is_online"
)
for
topic_id
,
tag_id
,
is_online
in
exposure_sql_query_results
:
if
is_online
:
tag_list
.
append
(
tag_id
)
# tag_sql_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(
# id=tag_id).values_list("id", "collection", "is_ai")
# for id, collection, is_ai in tag_sql_query_results:
# if collection == 1 or is_ai == 1:
tag_list
.
append
(
tag_id
)
if
topic_id
not
in
topic_tag_id_dict
:
topic_tag_id_dict
[
topic_id
]
=
list
()
...
...
@@ -189,7 +238,7 @@ class CollectData(object):
self
.
update_user_linucb_tag_info
(
reward
,
device_id
,
tag_id
,
user_feature
)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self
.
update_recommend_tag_list
(
device_id
,
user_feature
)
self
.
update_recommend_tag_list
(
device_id
,
user_feature
,
user_id
)
else
:
logging
.
warning
(
"unknown type msg:
%
s"
%
raw_val_dict
.
get
(
"type"
,
"missing type"
))
except
:
...
...
linucb/views/linucb.py
View file @
d10f2829
...
...
@@ -20,11 +20,16 @@ class LinUCB:
default_tag_list
=
list
()
@classmethod
def
get_default_tag_list
(
cls
):
def
get_default_tag_list
(
cls
,
user_id
):
try
:
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
if
user_id
:
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
cls
.
default_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
if
len
(
cls
.
default_tag_list
)
==
0
:
cls
.
default_tag_list
=
Tag
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
is_online
=
True
,
collection
=
1
)
.
values_list
(
"id"
,
flat
=
True
)[
0
:
100
]
return
cls
.
default_tag_list
except
:
...
...
search/utils/group.py
View file @
d10f2829
...
...
@@ -9,7 +9,7 @@ from libs.es import ESPerform
class
GroupUtils
(
object
):
@classmethod
def
get_group_query_result
(
cls
,
query
,
offset
,
size
):
def
get_group_query_result
(
cls
,
query
,
offset
,
size
):
try
:
q
=
dict
()
...
...
@@ -38,13 +38,13 @@ class GroupUtils(object):
}
}
q
[
"_source"
]
=
{
"includes"
:[
"id"
]
"includes"
:
[
"id"
]
}
return
ESPerform
.
get_search_results
(
ESPerform
.
get_cli
(),
"group"
,
q
,
offset
,
size
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"total_count"
:
0
,
"hits"
:
[]}
return
{
"total_count"
:
0
,
"hits"
:
[]}
@classmethod
def
get_hot_pictorial_recommend_result_list
(
cls
,
offset
,
size
,
es_cli_obj
=
None
):
...
...
@@ -57,12 +57,14 @@ class GroupUtils(object):
"bool"
:{
"must"
:[
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:{
"is_deleted"
:
False
}}
{
"term"
:{
"is_deleted"
:
False
}},
{
"term"
:
{
"effective"
:
True
}}
]
}
}
q
[
"sort"
]
=
[
{
"high_quality_topic_num"
:{
"order"
:
"desc"
}}
{
"high_quality_topic_num"
:{
"order"
:
"desc"
}},
{
"create_time"
:{
"order"
:
"desc"
}}
]
q
[
"_source"
]
=
{
"includes"
:[
"id"
]
...
...
@@ -80,7 +82,7 @@ class GroupUtils(object):
return
[]
@classmethod
def
get_user_attention_pictorial_list
(
cls
,
user_id
,
offset
=
0
,
size
=
10
,
es_cli_obj
=
None
):
def
get_user_attention_pictorial_list
(
cls
,
user_id
,
offset
=
0
,
size
=
10
,
es_cli_obj
=
None
):
"""
:remark: 获取用户关注小组列表
:return:
...
...
@@ -91,29 +93,32 @@ class GroupUtils(object):
q
=
dict
()
q
[
"query"
]
=
{
"bool"
:{
"must"
:[
{
"term"
:{
"is_online"
:
True
}},
{
"term"
:{
"user_id"
:
user_id
}},
{
"term"
:{
"is_deleted"
:
False
}}
"bool"
:
{
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"user_id"
:
user_id
}},
{
"term"
:
{
"is_deleted"
:
False
}},
{
"term"
:{
"effective"
:
True
}}
]
}
}
q
[
"_source"
]
=
{
"includes"
:[
"attention_pictorial_id_list"
]
"includes"
:
[
"attention_pictorial_id_list"
]
}
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"user"
,
q
,
offset
,
size
)
if
len
(
result_dict
[
"hits"
])
>
0
:
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"user"
,
q
,
offset
,
size
)
if
len
(
result_dict
[
"hits"
])
>
0
:
return
result_dict
[
"hits"
][
0
][
"_source"
][
"attention_pictorial_id_list"
]
else
:
return
[]
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
@classmethod
def
get_pictorial_ids_by_aggs
(
cls
,
pictorial_ids_list
,
es_cli_obj
=
None
):
def
get_pictorial_ids_by_aggs
(
cls
,
pictorial_ids_list
,
es_cli_obj
=
None
):
"""
:remark:聚合查询获取小组列表
:param group_id_list:
...
...
@@ -124,34 +129,34 @@ class GroupUtils(object):
es_cli_obj
=
ESPerform
.
get_cli
()
q
=
dict
()
q
[
"size"
]
=
0
q
[
"size"
]
=
0
q
[
"query"
]
=
{
"terms"
:{
"pictorial_id"
:
pictorial_ids_list
"terms"
:
{
"pictorial_id"
:
pictorial_ids_list
}
}
q
[
"aggs"
]
=
{
"pictorial_ids"
:{
"terms"
:{
"field"
:
"pictorial_id"
"pictorial_ids"
:
{
"terms"
:
{
"field"
:
"pictorial_id"
},
"aggs"
:{
"max_date"
:{
"max"
:{
"field"
:
"update_time_val"
"aggs"
:
{
"max_date"
:
{
"max"
:
{
"field"
:
"update_time_val"
}
}
}
}
}
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"topic"
,
q
,
aggregations_query
=
True
)
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"topic"
,
q
,
aggregations_query
=
True
)
buckets_list
=
result_dict
[
"aggregations"
][
"pictorial_ids"
][
"buckets"
]
sorted_buckets_list
=
sorted
(
buckets_list
,
key
=
lambda
item
:
item
[
"max_date"
][
"value"
],
reverse
=
True
)
sorted_buckets_list
=
sorted
(
buckets_list
,
key
=
lambda
item
:
item
[
"max_date"
][
"value"
],
reverse
=
True
)
sorted_pictorial_id_list
=
[
item
[
"key"
]
for
item
in
sorted_buckets_list
]
return
sorted_pictorial_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
\ No newline at end of file
return
[]
search/utils/topic.py
View file @
d10f2829
...
...
@@ -122,7 +122,7 @@ class TopicUtils(object):
@classmethod
def
get_recommend_topic_ids
(
cls
,
user_id
,
tag_id
,
offset
,
size
,
single_size
,
query
=
None
,
query_type
=
TopicPageType
.
FIND_PAGE
,
filter_topic_id_list
=
[],
test_score
=
False
,
must_topic_id_list
=
[],
recommend_tag_list
=
[],
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
):
user_similar_score_list
=
[],
index_type
=
"topic"
,
routing
=
None
,
attention_tag_list
=
[]
):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
...
...
@@ -165,7 +165,7 @@ class TopicUtils(object):
"language_type"
:
1
}
},
"weight"
:
6
"weight"
:
6
0
},
{
"gauss"
:
{
...
...
@@ -174,7 +174,7 @@ class TopicUtils(object):
"decay"
:
0.99
}
},
"weight"
:
5
"weight"
:
60
}
]
...
...
@@ -194,7 +194,7 @@ class TopicUtils(object):
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"terms"
:
{
"user_id"
:
attention_user_id_list
}}}},
"weight"
:
3
,
"weight"
:
3
0
,
}
)
# if len(pick_user_id_list) > 0:
...
...
@@ -216,12 +216,12 @@ class TopicUtils(object):
# )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if
len
(
user
_tag_list
)
>
0
:
if
len
(
attention
_tag_list
)
>
0
:
functions_list
.
append
(
{
"filter"
:
{
"bool"
:
{
"should"
:
{
"terms"
:
{
"tag_list"
:
user
_tag_list
}}}},
"weight"
:
1
"should"
:
{
"terms"
:
{
"tag_list"
:
attention
_tag_list
}}}},
"weight"
:
1
000000
}
)
# if len(recommend_tag_list)>0:
...
...
@@ -257,7 +257,7 @@ class TopicUtils(object):
"query"
:
{
"bool"
:
{
"filter"
:
[
# {"range": {"content_level": {"gte": low_content_level
, "lte": 5}}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
5
}}},
# {"term": {"has_image":True}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
...
...
@@ -380,7 +380,7 @@ class TopicUtils(object):
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
logging
.
info
(
"topic_id_list:
%
s,attention_tag_list
%
s"
%
(
str
(
topic_id_list
),
str
(
attention_tag_list
)))
return
topic_id_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
...
...
@@ -435,7 +435,7 @@ class TopicUtils(object):
"query"
:
{
"bool"
:
{
"must"
:
[
# {"range": {"content_level": {"gte": 3
, "lte": 5}}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
4
,
"lte"
:
5
}}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
],
...
...
@@ -621,8 +621,7 @@ class TopicUtils(object):
"term"
:
{
k
:
v
},
})
if
not
v
:
if
v
in
(
None
,
''
,
[]):
continue
if
k
in
[
"create_time_gte"
,
"create_time_lte"
]:
...
...
search/views/group.py
View file @
d10f2829
...
...
@@ -53,7 +53,6 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
if
sort_type
==
GroupSortTypes
.
HOT_RECOMMEND
:
pictorial_ids_list
=
GroupUtils
.
get_hot_pictorial_recommend_result_list
(
offset
,
size
,
es_cli_obj
)
return
{
"pictorial_recommend_ids"
:
pictorial_ids_list
}
elif
sort_type
==
GroupSortTypes
.
ATTENTION_RECOMMEND
:
...
...
@@ -61,6 +60,7 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
es_cli_obj
=
es_cli_obj
)
if
len
(
attention_pictorial_list
)
==
0
:
return
{
"pictorial_recommend_ids"
:
[]}
else
:
attention_pictorial_id_list
=
[
item
[
"pictorial_id"
]
for
item
in
attention_pictorial_list
]
sorted_pictorial_ids_list
=
GroupUtils
.
get_pictorial_ids_by_aggs
(
attention_pictorial_id_list
,
...
...
@@ -73,7 +73,6 @@ def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0,
for
item
in
sorted_attention_pictorial_list
:
if
item
[
"pictorial_id"
]
not
in
pictorial_recommend_ids_list
:
pictorial_recommend_ids_list
.
append
(
item
[
"pictorial_id"
])
return
{
"pictorial_recommend_ids"
:
pictorial_recommend_ids_list
[
offset
:(
offset
+
size
)]}
except
:
...
...
@@ -106,7 +105,6 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
"includes"
:
[
"id"
,
"pictorial_id"
,
"tag_list"
]
}
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"topic"
,
q
,
offset
,
size
)
logging
.
info
(
"get result_dict:
%
s"
%
result_dict
)
pict_pictorial_ids_list
=
[]
topic_tag_list
=
[]
pictorial_id_list
=
[]
...
...
@@ -165,7 +163,7 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
"term"
:
{
"is_online"
:
True
}
},{
},
{
"term"
:
{
"is_deleted"
:
False
}
...
...
@@ -200,3 +198,49 @@ def pictorial_topic(topic_id=-1, offset=0, size=10):
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"pictorial_ids_list"
:
[]}
@bind
(
"physical/search/pictorial_topic_sort"
)
def
pictorial_topic_sort
(
pictorial_id
=-
1
,
offset
=
0
,
size
=
10
):
"""
:remark 小组排序,缺少:前1天发评论人数*x
:param user_id:
:param sort_type:
:param offset:
:param size:
:return:
"""
try
:
if
not
isinstance
(
pictorial_id
,
int
):
user_id
=
-
1
q
=
{
"query"
:{
"bool"
:{
"must"
:[
{
"term"
:{
"pictorial_id"
:
pictorial_id
}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}},
]
}
},
"sort"
:{
"total_vote_num"
:{
"order"
:
"desc"
}
}
}
pict_pictorial_ids_list
=
[]
# 获取es链接对象
es_cli_obj
=
ESPerform
.
get_cli
()
result_dict
=
ESPerform
.
get_search_results
(
es_cli_obj
,
"topic"
,
q
,
offset
,
size
)
logging
.
info
(
"get pictorial_topic_sort res:
%
s"
%
result_dict
)
for
item
in
result_dict
[
"hits"
]:
topic_id
=
item
[
"_source"
][
"id"
]
pict_pictorial_ids_list
.
append
(
topic_id
)
return
{
"pict_pictorial_ids_list"
:
pict_pictorial_ids_list
}
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"pict_pictorial_ids_list"
:
[]}
search/views/tag.py
View file @
d10f2829
...
...
@@ -24,34 +24,6 @@ def get_highlight(fields=[]):
@bind
(
"physical/search/query_tag"
)
def
query_tag
(
query
,
offset
,
size
):
try
:
"""
q = {
"query":{
"bool":{
"must":[
{"term":{"is_online":True}},
{"term": {"is_deleted": False}}
],
"should":[
{"multi_match":{
"query": query,
"fields":["name"],
"operator":"and"}}
],
"minimum_should_match":1
}
},
"sort":[
{"near_new_topic_num":{"order":"desc"}},
{'_score': {"order": "desc"}}
],
"_source": {
"includes": ["id", "name"]
}
}
q["highlight"] = get_highlight(["name"])
"""
q
=
{
"suggest"
:{
"tips-suggest"
:{
...
...
@@ -121,3 +93,11 @@ def query_by_tag_type(tag_type_id,offset,size):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"tag_list"
:
[]}
@bind
(
"physical/search/choice_push_tag"
)
def
choice_push_tag
(
device_id
,
user_id
):
try
:
pass
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
{
"tag_list"
:
[]}
search/views/topic.py
View file @
d10f2829
...
...
@@ -59,6 +59,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
redis_field_val_list
=
redis_client
.
hmget
(
redis_key
,
redis_field_list
)
topic_recommend_redis_key
=
"physical:linucb:topic_recommend:device_id:"
+
str
(
device_id
)
# click_recommend_redis_key = "physical:click_recommend:device_id:" + str(device_id)
# recommend_tag_dict = dict()
...
...
@@ -76,6 +77,20 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
recommend_topic_list
=
recommend_topic_id_list
[
cursor
:
newcursor
]
redis_client
.
hset
(
topic_recommend_redis_key
,
"cursor"
,
newcursor
)
# click_recommend_topic_id_list = list()
# click_recommend_topic_list = list()
#
# click_recommend_topic_dict = redis_client.hgetall(click_recommend_redis_key)
# if b"data" in click_recommend_topic_dict:
# click_recommend_topic_id_list = json.loads(click_recommend_topic_dict[b"data"])
# cursor = int(str(click_recommend_topic_dict[b"cursor"], encoding="utf-8"))
# newcursor = cursor + 2
# if newcursor < 4 and len(click_recommend_topic_id_list) ==2:
# for i in range(0,2):
# click_recommend_topic_list.append(click_recommend_topic_id_list[i])
# redis_client.hset(click_recommend_redis_key, "cursor", newcursor)
# combine_recommend_topic_list_tmp = click_recommend_topic_list.extend(recommend_topic_list)
# combine_recommend_topic_list = combine_recommend_topic_list_tmp[0:5]
have_read_topic_id_list
=
list
()
if
redis_field_val_list
[
0
]:
...
...
@@ -90,15 +105,23 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
user_similar_score_redis_list
=
json
.
loads
(
redis_user_similar_score_redis_val
)
if
redis_user_similar_score_redis_val
else
[]
size
=
size
-
len
(
recommend_topic_list
)
redis_tag_data
=
redis_client
.
hget
(
"physical:linucb:register_user_tag_info"
,
user_id
)
attention_tag_list
=
json
.
loads
(
redis_tag_data
)
if
redis_tag_data
else
[]
logging
.
info
(
"attention_tag_list:
%
s"
%
(
str
(
attention_tag_list
)))
if
len
(
recommend_topic_list
)
>
0
:
size
=
size
-
len
(
recommend_topic_list
)
have_read_topic_id_list
.
extend
(
recommend_topic_list
)
topic_id_list
=
TopicUtils
.
get_recommend_topic_ids
(
user_id
=
user_id
,
tag_id
=
tag_id
,
offset
=
offset
,
size
=
size
,
single_size
=
size
,
query
=
query
,
query_type
=
query_type
,
filter_topic_id_list
=
have_read_topic_id_list
,
recommend_tag_list
=
recommend_topic_list
,
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5"
)
user_similar_score_list
=
user_similar_score_redis_list
,
index_type
=
"topic"
,
routing
=
"4,5"
,
attention_tag_list
=
attention_tag_list
)
have_read_group_id_set
=
set
()
have_read_user_id_set
=
set
()
unread_topic_id_dict
=
dict
()
logging
.
info
(
"attention_tag_list:
%
s"
%
(
str
(
topic_id_list
)))
# # 当前页小组数量
# cur_page_group_num = 0
...
...
@@ -136,8 +159,9 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# else:
# break
recommend_topic_list
.
extend
(
topic_id_list
)
have_read_topic_id_list
.
extend
(
recommend_topic_list
)
topic_id_list
.
extend
(
recommend_topic_list
)
# recommend_topic_list.extend(topic_id_list)
have_read_topic_id_list
.
extend
(
topic_id_list
)
if
len
(
have_read_topic_id_list
)
>
30000
:
cut_len
=
len
(
have_read_topic_id_list
)
-
30000
have_read_topic_id_list
=
have_read_topic_id_list
[
cut_len
:]
...
...
@@ -148,7 +172,7 @@ def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query
# 每个session key保存15分钟
redis_client
.
expire
(
redis_key
,
60
*
60
*
24
*
30
)
return
recommend_topic
_list
return
topic_id
_list
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
[]
...
...
trans2es/mapping/pictorial.json
View file @
d10f2829
...
...
@@ -15,7 +15,7 @@
"update_time"
:{
"type"
:
"date"
,
"format"
:
"date_time_no_millis"
},
"tag_id"
:{
"type"
:
"long"
},
"tag_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"topic_id_list"
:{
"type"
:
"long"
}
"topic_id_list"
:{
"type"
:
"long"
}
,
"effective"
:{
"type"
:
"boolean"
}
}
}
\ No newline at end of file
trans2es/mapping/topic-high-star.json
View file @
d10f2829
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
...
...
@@ -48,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/mapping/topic-star-routing.json
View file @
d10f2829
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
...
...
@@ -48,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/mapping/topic-v1.json
View file @
d10f2829
...
...
@@ -5,12 +5,14 @@
"is_online"
:{
"type"
:
"boolean"
},
//上线
"is_deleted"
:{
"type"
:
"boolean"
},
"vote_num"
:{
"type"
:
"long"
},
"total_vote_num"
:{
"type"
:
"long"
,
"default"
:
0
},
"reply_num"
:{
"type"
:
"long"
},
"name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"description"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
"content_level"
:{
"type"
:
"text"
},
"user_id"
:{
"type"
:
"long"
},
"user_nick_name"
:{
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
},
//帖子用户名
"group_id"
:{
"type"
:
"long"
},
//所在组ID
"tag_list"
:{
"type"
:
"long"
},
//标签属性
"edit_tag_list"
:{
"type"
:
"long"
},
//编辑标签
...
...
trans2es/mapping/topic.json
View file @
d10f2829
{
"dynamic"
:
"strict"
,
"_routing"
:
{
"required"
:
true
},
"properties"
:
{
"id"
:{
"type"
:
"long"
},
"is_online"
:{
"type"
:
"boolean"
},
//上线
...
...
@@ -49,7 +50,7 @@
"type"
:
"text"
,
"analyzer"
:
"gm_default_index"
,
"search_analyzer"
:
"gm_default_index"
}
}
,
"is_excellent"
:{
"type"
:
"long"
}
}
}
trans2es/models/pictorial.py
View file @
d10f2829
...
...
@@ -72,6 +72,26 @@ class Pictorial(models.Model):
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
def
get_effective
(
self
,
topic_id_list
):
try
:
effective_num
=
0
ret
=
False
for
topic_id
in
topic_id_list
:
topic_id_object
=
Topic
.
objects
.
filter
(
id
=
int
(
topic_id
))
.
first
()
if
topic_id_object
and
topic_id_object
.
is_online
and
int
(
topic_id_object
.
content_level
)
in
[
0
,
3
,
4
,
5
]:
effective_num
+=
1
if
effective_num
>=
5
:
ret
=
True
break
return
ret
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
# 获取前一天4,5星发帖数
def
get_high_quality_topic_num
(
self
):
try
:
...
...
trans2es/models/tag.py
View file @
d10f2829
...
...
@@ -19,6 +19,8 @@ class TopicTag(models.Model):
tag_id
=
models
.
IntegerField
(
verbose_name
=
u"标签ID"
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否在线"
)
is_collection
=
models
.
IntegerField
(
verbose_name
=
u"是否编辑标签"
)
class
AccountUserTag
(
models
.
Model
):
...
...
trans2es/models/topic.py
View file @
d10f2829
...
...
@@ -85,9 +85,12 @@ class Topic(models.Model):
def
get_virtual_vote_num
(
self
):
try
:
topic_extra
=
TopicExtra
.
object
.
get
(
topic_id
=
self
.
id
)
return
topic_extra
.
virtual_vote_num
topic_extra
=
TopicExtra
.
objects
.
filter
(
topic_id
=
self
.
id
)
.
first
()
virtual_vote_num
=
topic_extra
.
virtual_vote_num
if
topic_extra
else
0
return
virtual_vote_num
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
0
def
get_pictorial_id
(
self
):
...
...
@@ -104,11 +107,14 @@ class Topic(models.Model):
@property
def
is_complaint
(
self
):
"""是否被举报"""
try
:
if
TopicComplaint
.
objects
.
filter
(
topic_id
=
self
.
id
,
is_online
=
True
)
.
exists
():
return
True
if
TopicComplaint
.
objects
.
filter
(
topic_id
=
self
.
id
,
is_online
=
True
)
.
exists
():
return
True
return
False
return
False
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
False
def
topic_has_image
(
self
):
try
:
...
...
@@ -194,14 +200,14 @@ class Topic(models.Model):
elif
self
.
content_level
==
'3'
:
offline_score
+=
2.0
exposure_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
1
)
.
count
()
click_count
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
2
)
.
count
()
uv_num
=
ActionSumAboutTopic
.
objects
.
using
(
settings
.
SLAVE_DB_NAME
)
.
filter
(
topic_id
=
self
.
id
,
data_type
=
3
)
.
count
()
if
exposure_count
>
0
:
offline_score
+=
click_count
/
exposure_count
if
uv_num
>
0
:
offline_score
+=
(
self
.
vote_num
/
uv_num
+
self
.
reply_num
/
uv_num
)
#
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=1).count()
#
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=2).count()
#
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, data_type=3).count()
#
#
if exposure_count > 0:
#
offline_score += click_count / exposure_count
#
if uv_num > 0:
#
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
...
...
@@ -244,7 +250,6 @@ class PictorialTopic(models.Model):
pictorial_id
=
models
.
BigIntegerField
(
verbose_name
=
u'画报ID'
)
topic_id
=
models
.
BigIntegerField
(
verbose_name
=
u'帖子ID'
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u"是否有效"
,
default
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
...
...
@@ -254,8 +259,25 @@ class TopicExtra(models.Model):
class
Meta
:
verbose_name
=
'帖子额外信息'
app_label
=
'community'
db_table
=
'topic_extra'
waq
db_table
=
'topic_extra'
id
=
models
.
IntegerField
(
verbose_name
=
u'ID'
,
primary_key
=
True
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
,
db_index
=
True
)
virtual_vote_num
=
models
.
IntegerField
(
verbose_name
=
"帖子虚拟点赞"
)
class
ExcellentTopic
(
models
.
Model
):
"""优质帖子"""
class
Meta
:
verbose_name
=
"优质帖子"
db_table
=
"excellent_topic"
id
=
models
.
IntegerField
(
verbose_name
=
u'ID'
,
primary_key
=
True
)
topic_id
=
models
.
IntegerField
(
verbose_name
=
u"帖子ID"
,
db_index
=
True
)
is_online
=
models
.
BooleanField
(
verbose_name
=
u'是否上线'
)
is_deleted
=
models
.
BooleanField
(
verbose_name
=
u'是否删除'
)
excellent_type
=
models
.
IntegerField
(
verbose_name
=
u"优质类型"
,
db_index
=
True
)
create_time
=
models
.
DateTimeField
(
verbose_name
=
u'创建时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
update_time
=
models
.
DateTimeField
(
verbose_name
=
u'更新时间'
,
default
=
datetime
.
datetime
.
fromtimestamp
(
0
))
trans2es/type_info.py
View file @
d10f2829
...
...
@@ -11,17 +11,19 @@ from libs.es import ESPerform
import
elasticsearch
import
elasticsearch.helpers
import
sys
from
libs.cache
import
redis_client
import
copy
from
trans2es.models
import
topic
,
user
,
pick_celebrity
,
group
,
celebrity
,
tag
,
contrast_similar
,
pictorial
from
trans2es.utils.user_transfer
import
UserTransfer
from
trans2es.utils.pick_celebrity_transfer
import
PickCelebrityTransfer
from
trans2es.utils.group_transfer
import
GroupTransfer
from
trans2es.utils.topic_transfer
import
TopicTransfer
from
trans2es.utils.excellect_topic_transfer
import
ExcellectTopicTransfer
from
trans2es.utils.pictorial_transfer
import
PictorialTransfer
from
trans2es.utils.celebrity_transfer
import
CelebrityTransfer
from
trans2es.utils.tag_transfer
import
TagTransfer
from
trans2es.utils.contrast_similar_transfer
import
Contrast_Similar_Transfer
from
libs.es
import
ESPerform
__es
=
None
...
...
@@ -62,6 +64,7 @@ class TypeInfo(object):
self
.
round_insert_chunk_size
=
round_insert_chunk_size
self
.
round_insert_period
=
round_insert_period
self
.
logic_database_id
=
logic_database_id
self
.
physical_topic_star
=
"physical:topic_star"
@property
def
query
(
self
):
...
...
@@ -139,9 +142,25 @@ class TypeInfo(object):
pk
,
))
else
:
data_list
.
append
(
data
)
if
self
.
type
==
"topic"
and
instance
.
content_level
and
int
(
instance
.
content_level
)
>=
4
:
topic_data_high_star_list
.
append
(
data
)
if
data
:
if
self
.
type
==
"topic"
:
ori_topic_star
=
redis_client
.
hget
(
self
.
physical_topic_star
,
data
[
"id"
])
if
not
ori_topic_star
:
redis_client
.
hset
(
self
.
physical_topic_star
,
data
[
"id"
],
data
[
"content_level"
])
else
:
int_ori_topic_star
=
int
(
str
(
ori_topic_star
,
encoding
=
"utf-8"
))
if
int_ori_topic_star
!=
data
[
"content_level"
]:
old_data
=
copy
.
deepcopy
(
data
)
old_data
[
"is_online"
]
=
False
old_data
[
"is_deleted"
]
=
True
old_data
[
"content_level"
]
=
int_ori_topic_star
data_list
.
append
(
old_data
)
redis_client
.
hset
(
self
.
physical_topic_star
,
data
[
"id"
],
data
[
"content_level"
])
data_list
.
append
(
data
)
# if self.type=="topic" and instance.content_level and int(instance.content_level)>=4:
# topic_data_high_star_list.append(data)
return
(
data_list
,
topic_data_high_star_list
)
...
...
@@ -199,22 +218,30 @@ class TypeInfo(object):
time2
=
end
-
begin
begin
=
time
.
time
()
logging
.
info
(
"get sub_index_name:
%
s"
%
sub_index_name
)
logging
.
info
(
"get data_list:
%
s"
%
data_list
)
#
logging.info("get sub_index_name:%s"%sub_index_name)
#
logging.info("get data_list:%s"%data_list)
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
sub_index_name
,
data_list
=
data_list
,
es
=
es
,
)
# 同时写4星及以上的帖子
if
len
(
topic_data_high_star_list
)
>
0
:
if
sub_index_name
==
"topic"
:
self
.
elasticsearch_bulk_insert_data
(
sub_index_name
=
"topic-
high-star
"
,
data_list
=
topic_data_high_star
_list
,
sub_index_name
=
"topic-
star-routing
"
,
data_list
=
data
_list
,
es
=
es
,
)
# # 同时写4星及以上的帖子
# if len(topic_data_high_star_list)>0:
# self.elasticsearch_bulk_insert_data(
# sub_index_name="topic-high-star",
# data_list=topic_data_high_star_list,
# es=es,
# )
end
=
time
.
time
()
time3
=
end
-
begin
logging
.
info
(
"duan add,insert_table_by_pk_list time cost:
%
ds,
%
ds,
%
ds,
%
ds"
%
(
time0
,
time1
,
time2
,
time3
))
...
...
@@ -294,6 +321,16 @@ def get_type_info_map():
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
),
TypeInfo
(
name
=
'excellect-topic'
,
# 优质帖子
type
=
'excellect-topic'
,
model
=
topic
.
ExcellentTopic
,
query_deferred
=
lambda
:
topic
.
ExcellentTopic
.
objects
.
all
()
.
query
,
get_data_func
=
ExcellectTopicTransfer
.
get_excellect_topic_data
,
bulk_insert_chunk_size
=
100
,
round_insert_chunk_size
=
5
,
round_insert_period
=
2
,
),
TypeInfo
(
name
=
'topic'
,
# 日记
type
=
'topic'
,
...
...
trans2es/utils/excellect_topic_transfer.py
0 → 100644
View file @
d10f2829
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import
os
import
sys
import
logging
import
traceback
from
libs.tools
import
tzlc
import
time
import
re
import
datetime
from
trans2es.models.user
import
User
from
trans2es.models.topic
import
Topic
from
trans2es.utils.topic_transfer
import
TopicTransfer
class
ExcellectTopicTransfer
(
object
):
@classmethod
def
get_excellect_topic_data
(
cls
,
instance
):
try
:
topic_id
=
instance
.
topic_id
is_online
=
instance
.
is_online
is_deleted
=
instance
.
is_deleted
res
=
None
if
is_online
and
not
is_deleted
:
topic_ins
=
Topic
.
objects
.
filter
(
id
=
topic_id
)
.
first
()
if
topic_ins
:
res
=
TopicTransfer
.
get_topic_data
(
topic_ins
,
is_excellect
=
True
)
return
res
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
return
None
trans2es/utils/pictorial_transfer.py
View file @
d10f2829
...
...
@@ -37,6 +37,7 @@ class PictorialTransfer(object):
res
[
"tag_id"
]
=
tag_id
res
[
"tag_name"
]
=
instance
.
get_tag_by_name
(
tag_id
)
res
[
"topic_id_list"
]
=
instance
.
get_topic_id
()
res
[
"effective"
]
=
instance
.
get_effective
(
res
[
"topic_id_list"
])
return
res
except
:
...
...
trans2es/utils/topic_transfer.py
View file @
d10f2829
...
...
@@ -9,11 +9,12 @@ import time
import
re
import
datetime
from
trans2es.models.user
import
User
from
trans2es.models.topic
import
ExcellentTopic
class
TopicTransfer
(
object
):
@classmethod
def
get_topic_data
(
cls
,
instance
):
def
get_topic_data
(
cls
,
instance
,
is_excellect
=
False
):
try
:
res
=
dict
()
...
...
@@ -111,6 +112,15 @@ class TopicTransfer(object):
res
[
"total_vote_num"
]
=
instance
.
get_virtual_vote_num
()
+
instance
.
vote_num
if
is_excellect
:
res
[
"is_excellent"
]
=
1
else
:
excelllect_object
=
ExcellentTopic
.
objects
.
filter
(
topic_id
=
instance
.
id
)
.
first
()
if
excelllect_object
and
excelllect_object
.
is_online
and
not
excelllect_object
.
is_deleted
:
res
[
"is_excellent"
]
=
1
else
:
res
[
"is_excellent"
]
=
0
logging
.
info
(
"test topic transfer time cost,time0:
%
d,time1:
%
d,time2:
%
d,time3:
%
d,time4:
%
d"
%
(
time0
,
time1
,
time2
,
time3
,
time4
))
return
res
except
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment