Commit 165eded0 authored by lixiaofang's avatar lixiaofang

add

parents 79122e95 34a89a76
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FUSProjectUsageTrigger">
<session id="-127591351">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.open.time.1" value="1" />
<entry key="project.opened" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="Python" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="py" value="28" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="Python" value="28" />
</counts>
</usages-collector>
</session>
<session id="1569122105">
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="2" />
<entry key="template" value="1" />
<entry key="xml" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="PLAIN_TEXT" value="1" />
<entry key="Python" value="2" />
<entry key="XML" value="1" />
</counts>
</usages-collector>
</session>
<session id="-609148713">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.closed" value="1" />
<entry key="project.open.time.2" value="1" />
<entry key="project.open.time.8" value="1" />
<entry key="project.opened" value="2" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="json" value="8" />
<entry key="py" value="69" />
<entry key="template" value="3" />
<entry key="xml" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="JSON" value="8" />
<entry key="PLAIN_TEXT" value="3" />
<entry key="Python" value="69" />
<entry key="XML" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="json" value="56" />
<entry key="py" value="888" />
<entry key="template" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="JSON" value="56" />
<entry key="PLAIN_TEXT" value="5" />
<entry key="Python" value="888" />
</counts>
</usages-collector>
</session>
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="15">
=======
<state relative-caret-position="823">
>>>>>>> huabao
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="265">
<caret line="337" column="24" selection-start-line="337" selection-start-column="24" selection-end-line="337" selection-end-column="24" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/models/pictorial.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="307">
<caret line="264" column="36" lean-forward="true" selection-start-line="264" selection-start-column="36" selection-end-line="264" selection-end-column="36" />
=======
<state>
<caret line="31" column="4" selection-start-line="31" selection-start-column="4" selection-end-line="31" selection-end-column="4" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="322">
<caret line="28" column="48" lean-forward="true" selection-start-line="28" selection-start-column="48" selection-end-line="28" selection-end-column="48" />
>>>>>>> huabao
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="215">
<caret line="24" column="16" selection-start-line="24" selection-start-column="5" selection-end-line="24" selection-end-column="16" />
=======
<state relative-caret-position="319">
<caret line="27" column="36" lean-forward="true" selection-start-line="27" selection-start-column="36" selection-end-line="27" selection-end-column="36" />
>>>>>>> huabao
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="1068">
<caret line="75" column="19" lean-forward="true" selection-start-line="75" selection-start-column="19" selection-end-line="75" selection-end-column="19" />
=======
<state relative-caret-position="1129">
<caret line="88" column="48" lean-forward="true" selection-start-line="88" selection-start-column="48" selection-end-line="88" selection-end-column="48" />
>>>>>>> huabao
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/models/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="698">
<caret line="98" column="80" selection-start-line="98" selection-start-column="12" selection-end-line="98" selection-end-column="80" />
<folding>
<element signature="e#46#118#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="16" column="1" lean-forward="true" selection-start-line="16" selection-start-column="1" selection-end-line="16" selection-end-column="1" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/utils/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1290">
<caret line="94" column="8" selection-start-line="94" selection-start-column="8" selection-end-line="94" selection-end-column="8" />
<folding>
<element signature="e#47#61#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>grop</find>
<find>contrast_similar</find>
<find>topic</find>
<find>GroupTransfer</find>
<find>get_group_query_result</find>
<find>get_group_ids_by_aggs</find>
<find>pictorial_id</find>
<find>pictorial_name</find>
<find>get_recommend_topic_ids</find>
<find>ES_INDEX_PREFIX</find>
<find>physical/search/query_tag_id_by_topic</find>
<find>tag_name_list</find>
<find>pictorial</find>
<find>group</find>
<find>update_time</find>
</findStrings>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py" />
<option value="$PROJECT_DIR$/physical/settings.py" />
<option value="$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/group.json" />
<option value="$PROJECT_DIR$/trans2es/mapping/pictorial.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/pictorial.json" />
<option value="$PROJECT_DIR$/trans2es/type_info.py" />
<option value="$PROJECT_DIR$/trans2es/models/group.py" />
<option value="$PROJECT_DIR$/trans2es/utils/group_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/models/pictorial.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/user.json" />
<option value="$PROJECT_DIR$/trans2es/utils/user_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/models/user.py" />
<option value="$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py" />
<option value="$PROJECT_DIR$/search/views/topic.py" />
<option value="$PROJECT_DIR$/search/views/group.py" />
<option value="$PROJECT_DIR$/search/utils/topic.py" />
<option value="$PROJECT_DIR$/physical/settings_local.py.template" />
<option value="$PROJECT_DIR$/search/utils/group.py" />
<option value="$PROJECT_DIR$/trans2es/models/topic.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/topic.json" />
<option value="$PROJECT_DIR$/trans2es/utils/topic_transfer.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<<<<<<< HEAD
<option name="x" value="-89" />
=======
<option name="x" value="-56" />
>>>>>>> huabao
<option name="y" value="23" />
<option name="width" value="1920" />
<option name="height" value="724" />
</component>
<component name="ProjectInspectionProfilesVisibleTreeState">
<entry key="Project Default">
<profile-state>
<expanded-state>
<State />
<State>
<id>General</id>
</State>
</expanded-state>
<selected-state>
<State>
<id>Buildout</id>
</State>
</selected-state>
</profile-state>
</entry>
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="injection" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="search" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="trans2es" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="trans2es" type="462c0819:PsiDirectoryNode" />
<item name="mapping" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/trans2es/mapping" />
</key>
<key name="MoveFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/search/views" />
</key>
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e" name="Default Changelist" comment="" />
<created>1548319196437</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1548319196437</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<<<<<<< HEAD
<frame x="-89" y="23" width="1920" height="724" extended-state="0" />
=======
<frame x="-56" y="23" width="1920" height="724" extended-state="0" />
>>>>>>> huabao
<editor active="true" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.18114798" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="DB Browser" order="2" />
<window_info id="Favorites" order="3" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="DB Execution Console" order="8" />
<<<<<<< HEAD
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.56978655" />
=======
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.0" />
>>>>>>> huabao
<window_info anchor="bottom" id="Python Console" order="10" />
<window_info anchor="bottom" id="Event Log" order="11" side_tool="true" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/user.py</url>
<line>148</line>
<option name="timeStamp" value="1" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/user.py</url>
<line>139</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/search/views/group.py</url>
<line>36</line>
<option name="timeStamp" value="3" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/search/utils/group.py</url>
<line>11</line>
<option name="timeStamp" value="4" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>86</line>
<option name="timeStamp" value="5" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>110</line>
<option name="timeStamp" value="7" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>136</line>
<option name="timeStamp" value="8" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/search/views/contrast_similar.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.py" />
<entry file="file://$PROJECT_DIR$/trans2es/mapping/group.json">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret column="1" lean-forward="true" selection-start-column="1" selection-end-column="1" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/management/commands/trans2es_mapping2es.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="21" column="26" selection-start-line="21" selection-start-column="26" selection-end-line="21" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/contrast_similar.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/group_user_role.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pick_celebrity.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pick_topic.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pickuserrecord.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user_extra.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user_follow.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/group_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="150">
<caret line="14" column="25" selection-start-line="14" selection-start-column="25" selection-end-line="14" selection-end-column="25" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="210">
<caret line="14" column="26" lean-forward="true" selection-start-line="14" selection-start-column="26" selection-end-line="14" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/user_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="644">
<caret line="76" selection-start-line="76" selection-end-line="76" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/user.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="448">
<caret line="51" column="32" selection-start-line="51" selection-start-column="5" selection-end-line="51" selection-end-column="32" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/django_init.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/settings.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="12" column="9" lean-forward="true" selection-start-line="12" selection-start-column="9" selection-end-line="12" selection-end-column="9" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="585">
<caret line="97" lean-forward="true" selection-start-line="97" selection-end-line="97" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="285">
<caret line="19" column="25" selection-start-line="19" selection-start-column="25" selection-end-line="19" selection-end-column="25" />
<folding>
<element signature="e#47#79#0" expanded="true" />
<element signature="e#460#7466#0" />
<element signature="e#1396#1736#0" />
<element signature="e#1787#2318#0" />
<element signature="e#2367#3238#0" />
<element signature="e#3291#3907#0" />
<element signature="e#3960#4576#0" />
<element signature="e#3954#5060#0" />
<element signature="e#5114#6318#0" />
<element signature="e#6364#6780#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-395">
<caret line="1" column="12" selection-start-line="1" selection-start-column="12" selection-end-line="1" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/tag.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/search/views/tag.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-28" />
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/pick.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/app_conf.xml">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/common.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="72">
<caret line="6" column="6" selection-start-line="6" selection-start-column="6" selection-end-line="6" selection-end-column="6" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/celery.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/celery_task_router.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="300">
<caret line="25" column="23" lean-forward="true" selection-start-line="25" selection-start-column="23" selection-end-line="25" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/urls.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/wsgi.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/settings_local.py.template">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1837">
<caret line="146" column="26" selection-start-line="146" selection-start-column="26" selection-end-line="146" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/views.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/libs/es.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="120">
<caret line="17" column="43" selection-start-line="17" selection-start-column="28" selection-end-line="17" selection-end-column="43" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/group.py">
<<<<<<< HEAD
=======
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="147">
<caret line="115" column="21" selection-start-line="115" selection-start-column="21" selection-end-line="115" selection-end-column="21" />
<folding>
<element signature="e#218#1347#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/injection/data_sync/tasks.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="22" column="22" selection-start-line="22" selection-start-column="22" selection-end-line="22" selection-end-column="38" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1290">
<caret line="94" column="8" selection-start-line="94" selection-start-column="8" selection-end-line="94" selection-end-column="8" />
<folding>
<element signature="e#47#61#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="319">
<caret line="27" column="36" lean-forward="true" selection-start-line="27" selection-start-column="36" selection-end-line="27" selection-end-column="36" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="823">
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.json">
>>>>>>> huabao
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="16" column="1" lean-forward="true" selection-start-line="16" selection-start-column="1" selection-end-line="16" selection-end-column="1" />
</state>
</provider>
</entry>
<<<<<<< HEAD
=======
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="265">
<caret line="337" column="24" selection-start-line="337" selection-start-column="24" selection-end-line="337" selection-end-column="24" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="322">
<caret line="28" column="48" lean-forward="true" selection-start-line="28" selection-start-column="48" selection-end-line="28" selection-end-column="48" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
>>>>>>> huabao
<entry file="file://$PROJECT_DIR$/trans2es/models/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="698">
<caret line="98" column="80" selection-start-line="98" selection-start-column="12" selection-end-line="98" selection-end-column="80" />
<folding>
<element signature="e#46#118#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pictorial.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret line="31" column="4" selection-start-line="31" selection-start-column="4" selection-end-line="31" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1129">
<caret line="88" column="48" lean-forward="true" selection-start-line="88" selection-start-column="48" selection-end-line="88" selection-end-column="48" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<<<<<<< HEAD
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="215">
<caret line="24" column="16" selection-start-line="24" selection-start-column="5" selection-end-line="24" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="15">
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="307">
<caret line="264" column="36" lean-forward="true" selection-start-line="264" selection-start-column="36" selection-end-line="264" selection-end-column="36" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1068">
<caret line="75" column="19" lean-forward="true" selection-start-line="75" selection-start-column="19" selection-end-line="75" selection-end-column="19" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
=======
>>>>>>> huabao
</component>
<component name="masterDetails">
<states>
<state key="ScopeChooserConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
\ No newline at end of file
...@@ -9,11 +9,13 @@ ...@@ -9,11 +9,13 @@
<config name="initializer_list"> <config name="initializer_list">
<element value="physical.django_init"/> <element value="physical.django_init"/>
<element value="search.views.topic"/> <element value="search.views.topic"/>
<element value="search.views.business_topic"/>
<element value="search.views.pick"/> <element value="search.views.pick"/>
<element value="search.views.group"/> <element value="search.views.group"/>
<element value="search.views.user"/> <element value="search.views.user"/>
<element value="search.views.tag"/> <element value="search.views.tag"/>
<element value="search.views.contrast_similar"/> <element value="search.views.contrast_similar"/>
<element value="injection.data_sync.tasks"/> <element value="injection.data_sync.tasks"/>
<element value="search.views.contrast_similar"/>
</config> </config>
</gm_rpcd_config> </gm_rpcd_config>
...@@ -4,14 +4,15 @@ from celery import shared_task ...@@ -4,14 +4,15 @@ from celery import shared_task
from django.conf import settings from django.conf import settings
from django.core import serializers from django.core import serializers
from trans2es.type_info import get_type_info_map from trans2es.type_info import get_type_info_map
#from rpc.all import get_rpc_remote_invoker # from rpc.all import get_rpc_remote_invoker
from libs.es import ESPerform from libs.es import ESPerform
import logging import logging
import traceback import traceback
from libs.cache import redis_client from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar,UserSimilarScore
import json import json
@shared_task @shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False): def write_to_es(es_type, pk_list, use_batch_query_set=False):
try: try:
...@@ -20,6 +21,7 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -20,6 +21,7 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info = type_info_map[es_type] type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type)) logging.info("duan add,es_type:%s" % str(es_type))
logging.info("get es_type:%s"%es_type)
type_info.insert_table_by_pk_list( type_info.insert_table_by_pk_list(
sub_index_name=es_type, sub_index_name=es_type,
pk_list=pk_list, pk_list=pk_list,
...@@ -33,7 +35,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False): ...@@ -33,7 +35,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
@shared_task @shared_task
def sync_face_similar_data_to_redis(): def sync_face_similar_data_to_redis():
try: try:
result_items = FaceUserContrastSimilar.objects.filter(is_online=True,is_deleted=False).distinct().values("participant_user_id").values_list("participant_user_id",flat=True) result_items = FaceUserContrastSimilar.objects.filter(is_online=True, is_deleted=False).distinct().values(
"participant_user_id").values_list("participant_user_id", flat=True)
logging.info("duan add,begin sync_face_similar_data_to_redis!") logging.info("duan add,begin sync_face_similar_data_to_redis!")
...@@ -41,19 +44,45 @@ def sync_face_similar_data_to_redis(): ...@@ -41,19 +44,45 @@ def sync_face_similar_data_to_redis():
for participant_user_id in result_items: for participant_user_id in result_items:
redis_key = redis_key_prefix + str(participant_user_id) redis_key = redis_key_prefix + str(participant_user_id)
similar_result_items = FaceUserContrastSimilar.objects.filter(is_online=True,is_deleted=False,participant_user_id=participant_user_id,similarity__gt=0.4).order_by("-similarity").limit(100) similar_result_items = FaceUserContrastSimilar.objects.filter(is_online=True, is_deleted=False,
participant_user_id=participant_user_id,
similarity__gt=0.3).order_by(
"-similarity")
item_list = list() item_list = list()
for item in similar_result_items: for item in similar_result_items:
item_list.append( item_list.append(
{ {
"contrast_user_id":item.contrast_user_id, "contrast_user_id": item.contrast_user_id,
"similarity":item.similarity "similarity": item.similarity
} }
) )
redis_client.set(redis_key,json.dumps(item_list)) redis_client.set(redis_key, json.dumps(item_list))
logging.info("duan add,participant_user_id:%d set data done!" % participant_user_id) logging.info("duan add,participant_user_id:%d set data done!" % participant_user_id)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@shared_task
def sync_user_similar_score():
try:
results_items = UserSimilarScore.objects.filter(is_deleted=False).distinct().values("user_id").values_list("user_id",flat=True)
redis_key_prefix = "physical:user_similar_score:user_id:"
logging.info("duan add,begin sync user_similar_score!")
for user_id in results_items:
redis_key = redis_key_prefix + str(user_id)
similar_results_items = UserSimilarScore.objects.filter(is_deleted=False,user_id=user_id).order_by("-score")
item_list = list()
for item in similar_results_items:
contrast_user_id = item.contrast_user_id
score = item.score
item_list.append(
[contrast_user_id,score]
)
redis_client.set(redis_key, json.dumps(item_list))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -94,7 +94,7 @@ class ESPerform(object): ...@@ -94,7 +94,7 @@ class ESPerform(object):
return False return False
@classmethod @classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"): def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc",force_sync=False):
""" """
:remark: put index mapping :remark: put index mapping
:param es_cli: :param es_cli:
...@@ -107,10 +107,14 @@ class ESPerform(object): ...@@ -107,10 +107,14 @@ class ESPerform(object):
write_alias_name = cls.get_official_index_name(sub_index_name,"write") write_alias_name = cls.get_official_index_name(sub_index_name,"write")
index_exist = es_cli.indices.exists(write_alias_name) index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist: if not index_exist and not force_sync:
return False return False
mapping_dict = cls.__load_mapping(sub_index_name) mapping_dict = cls.__load_mapping(sub_index_name)
logging.info("get write_alias_name:%s"%write_alias_name)
logging.info("get mapping_dict:%s"%mapping_dict)
logging.info("get mapping_type:%s"%mapping_type)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type) es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True return True
...@@ -143,7 +147,7 @@ class ESPerform(object): ...@@ -143,7 +147,7 @@ class ESPerform(object):
try: try:
assert (es_cli is not None) assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name) official_index_name = cls.get_official_index_name(sub_index_name, "write")
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
if not index_exists: if not index_exists:
if not auto_create_index: if not auto_create_index:
...@@ -208,3 +212,34 @@ class ESPerform(object): ...@@ -208,3 +212,34 @@ class ESPerform(object):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]} return {"total_count":0,"hits":[]}
@classmethod
def if_es_node_load_high(cls, es_cli):
try:
assert (es_cli is not None)
high_num = 0
es_nodes_list = list()
es_nodes_ori_info = es_cli.cat.nodes()
es_nodes_info_list = es_nodes_ori_info.split("\n")
for item in es_nodes_info_list:
try:
item_list = item.split(" ")
if len(item_list)>4:
cpu_load = item_list[3]
if int(cpu_load) > 60:
high_num += 1
es_nodes_list.append(int(cpu_load))
except:
logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc()))
return True
if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s" % str(es_nodes_info_list))
return True
else:
return False
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return True
...@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import ...@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
import six import six
import random import random
from django.db import models from django.db import models
import logging
import traceback
class ITableChunk(object): class ITableChunk(object):
...@@ -147,13 +149,18 @@ class TableSlicerChunk(ITableChunk): ...@@ -147,13 +149,18 @@ class TableSlicerChunk(ITableChunk):
class TableSlicer(object): class TableSlicer(object):
def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None): def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None):
try:
assert isinstance(queryset, models.QuerySet) assert isinstance(queryset, models.QuerySet)
assert chunk_size is None or isinstance(chunk_size, six.integer_types) assert chunk_size is None or isinstance(chunk_size, six.integer_types)
assert chunk_count is None or isinstance(chunk_count, six.integer_types) assert chunk_count is None or isinstance(chunk_count, six.integer_types)
assert sep_list is None or isinstance(sep_list, list) assert sep_list is None or isinstance(sep_list, list)
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1 assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
if sep_list is not None: if sep_list is not None:
sep_list = list(sep_list) sep_list = list(sep_list)
else: else:
...@@ -169,14 +176,20 @@ class TableSlicer(object): ...@@ -169,14 +176,20 @@ class TableSlicer(object):
self._model = queryset.model self._model = queryset.model
self._query = queryset.query self._query = queryset.query
self._sep_list = [None] + sep_list + [None] self._sep_list = [None] + sep_list + [None]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def chunks(self): def chunks(self):
try:
reversed_sep_list = list(reversed(self._sep_list)) reversed_sep_list = list(reversed(self._sep_list))
logging.info("duan add,reversed_sep_list:%d" % (len(self._sep_list) - 1))
for i in range(len(self._sep_list) - 1): for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i+1] pk_start = reversed_sep_list[i + 1]
pk_stop = reversed_sep_list[i] pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop) yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class TableStreamingSlicer(object): class TableStreamingSlicer(object):
......
from django.contrib import admin
# Register your models here.
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
# Create your views here.
# -*- coding: UTF-8 -*-
# !/usr/bin/env python
from kafka import KafkaConsumer
from libs.cache import redis_client
import logging
from linucb.views.linucb import LinUCB
import json
from trans2es.models.tag import TopicTag
import traceback
from django.conf import settings
class KafkaManager(object):
consumser_obj = None
@classmethod
def get_kafka_consumer_ins(cls, topic_name=None):
if not cls.consumser_obj:
topic_name = settings.KAFKA_TOPIC_NAME if not topic_name else topic_name
cls.consumser_obj = KafkaConsumer(topic_name,bootstrap_servers=settings.KAFKA_BROKER_LIST)
# cls.consumser_obj.subscribe([topic_name])
return cls.consumser_obj
class CollectData(object):
def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
# 默认
self.user_feature = [0,1]
def _get_user_linucb_info(self, device_id):
try:
redis_key = self.linucb_matrix_redis_prefix + str(device_id)
# dict的key为标签ID,value为4个矩阵
redis_linucb_tag_data_dict = redis_client.hgetall(redis_key)
return redis_linucb_tag_data_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
def update_recommend_tag_list(self, device_id,user_feature=None):
try:
recommend_tag_list = list()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_list) > 0:
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def update_user_linucb_tag_info(self, reward, device_id, tag_id, user_feature=None):
try:
user_feature = user_feature if user_feature else self.user_feature
return LinUCB.update_linucb_info(user_feature, reward, tag_id, device_id,self.linucb_matrix_redis_prefix,redis_client)
except:
logging.error("update_user_linucb_tag_info error!")
return False
def consume_data_from_kafka(self,topic_name=None):
try:
user_feature = [1,1]
kafka_consumer_obj = KafkaManager.get_kafka_consumer_ins(topic_name)
while True:
msg_dict = kafka_consumer_obj.poll(timeout_ms=100)
for msg_key in msg_dict:
consume_msg = msg_dict[msg_key]
for ori_msg in consume_msg:
try:
logging.info(ori_msg)
raw_val_dict = json.loads(ori_msg.value)
if "type" in raw_val_dict and "on_click_feed_topic_card" == raw_val_dict["type"]:
topic_id = raw_val_dict["params"]["business_id"] or raw_val_dict["params"]["topic_id"]
device_id = raw_val_dict["device"]["device_id"]
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id=topic_id)
for sql_item in sql_query_results:
tag_list.append(sql_item.tag_id)
is_click = 1
is_vote = 0
reward = 1 if is_click or is_vote else 0
logging.info("positive tag_list,device_id:%s,topic_id:%s,tag_list:%s" % (
str(device_id), str(topic_id), str(tag_list)))
for tag_id in tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["exposure_cards"],str):
exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
elif isinstance(raw_val_dict["params"]["exposure_cards"],list):
exposure_cards_list = raw_val_dict["params"]["exposure_cards"]
else:
exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"]
exposure_topic_id_list = list()
for item in exposure_cards_list:
if "card_id" not in item:
continue
exposure_topic_id = item["card_id"]
logging.info(
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
exposure_topic_id_list.append(exposure_topic_id)
topic_tag_id_dict = dict()
tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id__in=exposure_topic_id_list)
for sql_item in sql_query_results:
tag_list.append(sql_item.tag_id)
if sql_item.topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id] = list()
topic_tag_id_dict[sql_item.topic_id].append(sql_item.tag_id)
is_click = 0
is_vote = 0
reward = 1 if is_click or is_vote else 0
logging.info("negative tag_list,device_id:%s,topic_tag_id_dict:%s" % (
str(device_id), str(topic_tag_id_dict)))
for tag_id in tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
else:
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
# -*- coding: UTF-8 -*-
# !/usr/bin/env python
import numpy as np
import redis
from libs.cache import redis_client
from trans2es.models.tag import Tag
import logging
import traceback
import json
import pickle
class LinUCB:
d = 2
alpha = 0.25
r1 = 1
r0 = -0.5
default_tag_list = list()
@classmethod
def get_default_tag_list(cls):
try:
if len(cls.default_tag_list) == 0:
query_item_results = Tag.objects.filter(is_online=True)
for item in query_item_results:
cls.default_tag_list.append(item.id)
return cls.default_tag_list[:20]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def linucb_recommend_tag(cls,device_id,redis_linucb_tag_data_dict,user_features_list,tag_list):
"""
:remark 获取推荐标签
:param redis_linucb_tag_data_dict:
:param user_features_list:
:param tag_list:
:return:
"""
try:
Aa_list = list()
theta_list = list()
for tag_id in tag_list:
tag_dict = pickle.loads(redis_linucb_tag_data_dict[tag_id])
Aa_list.append(tag_dict["Aa"])
theta_list.append(tag_dict["theta"])
xaT = np.array([user_features_list])
xa = np.transpose(xaT)
art_max = -1
old_pa = 0
AaI_tmp = np.array(Aa_list)
theta_tmp = np.array(theta_list)
np_array = np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa))
# top_tag_list_len = int(np_array.size/2)
# top_np_ind = np.argpartition(np_array, -top_tag_list_len)[-top_tag_list_len:]
#
# top_tag_list = list()
# top_np_list = top_np_ind.tolist()
# for tag_id in top_np_list:
# top_tag_list.append(tag_id)
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set()
np_score_list = list()
np_score_dict = dict()
for score_index in range(0,np_array.size):
score = np_array.take(score_index)
np_score_list.append(score)
if score not in np_score_dict:
np_score_dict[score] = [score_index]
else:
np_score_dict[score].append(score_index)
sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8"))
if len(top_tag_set) >= 10:
break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
try:
redis_key = redis_prefix + str(device_id)
user_tag_linucb_dict = dict()
for tag_id in tag_list:
init_dict = {
"Aa": np.identity(cls.d),
"theta": np.zeros((cls.d, 1)),
"ba": np.zeros((cls.d, 1)),
"AaI": np.identity(cls.d)
}
pickle_data = pickle.dumps(init_dict)
user_tag_linucb_dict[tag_id] = pickle_data
redis_cli.hmset(redis_key, user_tag_linucb_dict)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def update_linucb_info(cls, user_features,reward, tag_id, device_id, redis_prefix,redis_cli):
try:
if reward == -1:
logging.warning("reward val error!")
elif reward == 1 or reward == 0:
if reward == 1:
r = cls.r1
else:
r = cls.r0
xaT = np.array([user_features])
xa = np.transpose(xaT)
redis_key = redis_prefix + str(device_id)
ori_redis_tag_data = redis_cli.hget(redis_key, tag_id)
if not ori_redis_tag_data:
LinUCB.init_device_id_linucb_info(redis_client, redis_prefix, device_id,[tag_id])
else:
ori_redis_tag_dict = pickle.loads(ori_redis_tag_data)
new_Aa_matrix = ori_redis_tag_dict["Aa"] + np.dot(xa, xaT)
new_AaI_matrix = np.linalg.solve(new_Aa_matrix, np.identity(cls.d))
new_ba_matrix = ori_redis_tag_dict["ba"] + r*xa
user_tag_dict = {
"Aa": new_Aa_matrix,
"ba": new_ba_matrix,
"AaI": new_AaI_matrix,
"theta": np.dot(new_AaI_matrix, new_ba_matrix)
}
redis_cli.hset(redis_key, tag_id, pickle.dumps(user_tag_dict))
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
\ No newline at end of file
...@@ -6,9 +6,10 @@ import itertools ...@@ -6,9 +6,10 @@ import itertools
from django.conf import settings from django.conf import settings
import logging import logging
class CeleryTaskRouter(object): class CeleryTaskRouter(object):
queue_task_map = { queue_task_map = {
"tapir-alpha":[ "tapir-alpha": [
'injection.data_sync.tasks.write_to_es', 'injection.data_sync.tasks.write_to_es',
] ]
} }
......
...@@ -14,188 +14,4 @@ import os ...@@ -14,188 +14,4 @@ import os
from .log_settings import * from .log_settings import *
from datetime import timedelta from datetime import timedelta
from celery.schedules import crontab from celery.schedules import crontab
from .settings_local import *
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '^j3sg)sj8rc@du74%fb$c2926tv!!4g(kp-=rx1)c5!1&1(dq='
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = False
ALLOWED_HOSTS = []
# Application definition
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# CELERY_SEND_EVENTS = True
# CELERY_SEND_TASK_SENT_EVENT = True
#
# CELERY_DEFAULT_EXCHANGE = 'celery'
# CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
# CELERY_DEFAULT_ROUTING_KEY = 'celery'
#
# CELERY_QUEUES = {
# 'celery': {
# 'exchange': CELERY_DEFAULT_EXCHANGE,
# 'routing_key': CELERY_DEFAULT_ROUTING_KEY,
# },
# 'order': {
# 'exchange': 'order',
# 'routing_key': 'order',
# },
# }
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
CELERYBEAT_SCHEDULE = {
'sync_face_similar_data_to_redis': {
'task': 'injection.data_sync.tasks.sync_face_similar_data_to_redis',
'schedule': timedelta(seconds=120),
'args': ()
},
}
"""
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
"""
ROOT_URLCONF = 'physical.urls'
WSGI_APPLICATION = 'physical.wsgi.application'
REDIS_URL = "redis://127.0.0.1:6379"
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASE_ROUTERS = ['physical.DBRouter.DBRouter']
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'alpha',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
"HOST": 'rm-2zeaut61u9sm21m0bjo.mysql.rds.aliyuncs.com',
#'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
},
'face': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'face',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
"HOST": "rm-2zeaut61u9sm21m0bjo.mysql.rds.aliyuncs.com",
# 'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
# 'CONN_MAX_AGE': None,
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX = "gm-dbmw"
MIDDLEWARE_CLASSES = (
'gm_tracer.middleware.TracerMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'helios.DjangoL5dMiddleware',
)
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
# LANGUAGE_CODE = 'en-us'
#
TIME_ZONE = 'Asia/Shanghai'
#
USE_I18N = True
USE_L10N = True
#
# USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_URL = '/static/'
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '^j3sg)sj8rc@du74%fb$c2926tv!!4g(kp-=rx1)c5!1&1(dq='
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = False
ALLOWED_HOSTS = []
# Application definition
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# CELERY_SEND_EVENTS = True
# CELERY_SEND_TASK_SENT_EVENT = True
#
# CELERY_DEFAULT_EXCHANGE = 'celery'
# CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
# CELERY_DEFAULT_ROUTING_KEY = 'celery'
#
# CELERY_QUEUES = {
# 'celery': {
# 'exchange': CELERY_DEFAULT_EXCHANGE,
# 'routing_key': CELERY_DEFAULT_ROUTING_KEY,
# },
# 'order': {
# 'exchange': 'order',
# 'routing_key': 'order',
# },
# }
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
CELERYBEAT_SCHEDULE = {
'sync_face_similar_data_to_redis': {
'task': 'injection.data_sync.tasks.sync_face_similar_data_to_redis',
'schedule': 120.0,
'args': ()
},
}
"""
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
"""
ROOT_URLCONF = 'physical.urls'
WSGI_APPLICATION = 'physical.wsgi.application'
REDIS_URL = "redis://127.0.0.1:6379"
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'alpha',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
},
'face': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'face',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
# 'CONN_MAX_AGE': None,
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX = "gm-dbmw"
MIDDLEWARE_CLASSES = (
'gm_tracer.middleware.TracerMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'helios.DjangoL5dMiddleware',
)
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
# LANGUAGE_CODE = 'en-us'
#
TIME_ZONE = 'Asia/Shanghai'
#
USE_I18N = True
USE_L10N = True
#
# USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_URL = '/static/'
...@@ -12,6 +12,8 @@ PyMySQL==0.9.2 ...@@ -12,6 +12,8 @@ PyMySQL==0.9.2
gunicorn==19.9.0 gunicorn==19.9.0
gevent==1.3.7 gevent==1.3.7
pypinyin==0.34.1 pypinyin==0.34.1
numpy==1.16.2
lz4==2.1.6
git+ssh://git@git.wanmeizhensuo.com/backend/gm-rpcd.git@master git+ssh://git@git.wanmeizhensuo.com/backend/gm-rpcd.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/helios.git@master git+ssh://git@git.wanmeizhensuo.com/backend/helios.git@master
......
...@@ -47,7 +47,7 @@ class GroupUtils(object): ...@@ -47,7 +47,7 @@ class GroupUtils(object):
return {"total_count":0, "hits":[]} return {"total_count":0, "hits":[]}
@classmethod @classmethod
def get_hot_group_recommend_result_list(cls,offset,size,es_cli_obj=None): def get_hot_pictorial_recommend_result_list(cls,offset,size,es_cli_obj=None):
try: try:
if not es_cli_obj: if not es_cli_obj:
es_cli_obj = ESPerform.get_cli() es_cli_obj = ESPerform.get_cli()
...@@ -68,19 +68,19 @@ class GroupUtils(object): ...@@ -68,19 +68,19 @@ class GroupUtils(object):
"includes":["id"] "includes":["id"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj,"group",q,offset,size) result_dict = ESPerform.get_search_results(es_cli_obj,"pictorial",q,offset,size)
group_ids_list = [] pictorial_ids_list = []
if len(result_dict["hits"]) > 0: if len(result_dict["hits"]) > 0:
group_ids_list = [item["_source"]["id"] for item in result_dict["hits"]] pictorial_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]
return group_ids_list return pictorial_ids_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
@classmethod @classmethod
def get_user_attention_group_list(cls,user_id,offset=0,size=10,es_cli_obj=None): def get_user_attention_pictorial_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
""" """
:remark: 获取用户关注小组列表 :remark: 获取用户关注小组列表
:return: :return:
...@@ -100,12 +100,12 @@ class GroupUtils(object): ...@@ -100,12 +100,12 @@ class GroupUtils(object):
} }
} }
q["_source"] = { q["_source"] = {
"includes":["attention_group_id_list"] "includes":["attention_pictorial_id_list"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj,"user",q,offset,size) result_dict = ESPerform.get_search_results(es_cli_obj,"user",q,offset,size)
if len(result_dict["hits"])>0: if len(result_dict["hits"])>0:
return result_dict["hits"][0]["_source"]["attention_group_id_list"] return result_dict["hits"][0]["_source"]["attention_pictorial_id_list"]
else: else:
return [] return []
except: except:
...@@ -113,7 +113,7 @@ class GroupUtils(object): ...@@ -113,7 +113,7 @@ class GroupUtils(object):
return [] return []
@classmethod @classmethod
def get_group_ids_by_aggs(cls,group_id_list,es_cli_obj=None): def get_pictorial_ids_by_aggs(cls,pictorial_ids_list,es_cli_obj=None):
""" """
:remark:聚合查询获取小组列表 :remark:聚合查询获取小组列表
:param group_id_list: :param group_id_list:
...@@ -127,13 +127,13 @@ class GroupUtils(object): ...@@ -127,13 +127,13 @@ class GroupUtils(object):
q["size"]=0 q["size"]=0
q["query"] = { q["query"] = {
"terms":{ "terms":{
"group_id":group_id_list "pictorial_id":pictorial_ids_list
} }
} }
q["aggs"] = { q["aggs"] = {
"group_ids":{ "pictorial_ids":{
"terms":{ "terms":{
"field":"group_id" "field":"pictorial_id"
}, },
"aggs":{ "aggs":{
"max_date":{ "max_date":{
...@@ -146,12 +146,12 @@ class GroupUtils(object): ...@@ -146,12 +146,12 @@ class GroupUtils(object):
} }
result_dict = ESPerform.get_search_results(es_cli_obj,"topic",q,aggregations_query=True) result_dict = ESPerform.get_search_results(es_cli_obj,"topic",q,aggregations_query=True)
buckets_list = result_dict["aggregations"]["group_ids"]["buckets"] buckets_list = result_dict["aggregations"]["pictorial_ids"]["buckets"]
sorted_buckets_list = sorted(buckets_list,key=lambda item:item["max_date"]["value"],reverse=True) sorted_buckets_list = sorted(buckets_list,key=lambda item:item["max_date"]["value"],reverse=True)
sorted_group_id_list = [item["key"] for item in sorted_buckets_list] sorted_pictorial_id_list = [item["key"] for item in sorted_buckets_list]
return sorted_group_id_list return sorted_pictorial_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
\ No newline at end of file
...@@ -12,7 +12,6 @@ from .common import TopicDocumentField ...@@ -12,7 +12,6 @@ from .common import TopicDocumentField
from search.utils.common import * from search.utils.common import *
class TopicUtils(object): class TopicUtils(object):
@classmethod @classmethod
...@@ -27,22 +26,23 @@ class TopicUtils(object): ...@@ -27,22 +26,23 @@ class TopicUtils(object):
try: try:
q = dict() q = dict()
q["query"] = { q["query"] = {
"term":{ "term": {
"user_id": user_id "user_id": user_id
} }
} }
q["_source"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_group_user_id_list"] q["_source"] = {
"include": ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size) result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)
return result_dict return result_dict
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]} return {"total_count": 0, "hits": []}
@classmethod @classmethod
def analyze_related_user_id_list(cls,related_user_id_list): def analyze_related_user_id_list(cls, related_user_id_list):
""" """
:remark:获取指定用户关联的 用户列表 :remark:获取指定用户关联的 用户列表
:param related_user_id_list: :param related_user_id_list:
...@@ -61,13 +61,13 @@ class TopicUtils(object): ...@@ -61,13 +61,13 @@ class TopicUtils(object):
elif item["country_id"] == 2: elif item["country_id"] == 2:
korea_user_id_list.append(item["user_id"]) korea_user_id_list.append(item["user_id"])
return (chinese_user_id_list,japan_user_id_list,korea_user_id_list) return (chinese_user_id_list, japan_user_id_list, korea_user_id_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[],[]) return ([], [], [])
@classmethod @classmethod
def refresh_redis_hash_data(cls, redis_cli,redis_key,redis_data_dict): def refresh_redis_hash_data(cls, redis_cli, redis_key, redis_data_dict):
try: try:
redis_cli.hmset(redis_key, redis_data_dict) redis_cli.hmset(redis_key, redis_data_dict)
return True return True
...@@ -76,13 +76,13 @@ class TopicUtils(object): ...@@ -76,13 +76,13 @@ class TopicUtils(object):
return False return False
@classmethod @classmethod
def ___get_should_term_list(cls,ori_list,field_name="tag_list"): def ___get_should_term_list(cls, ori_list, field_name="tag_list"):
try: try:
should_term_list = list() should_term_list = list()
for term_id in ori_list: for term_id in ori_list:
term_dict = { term_dict = {
"term":{ "term": {
field_name:{"value":term_id} field_name: {"value": term_id}
} }
} }
should_term_list.append(term_dict) should_term_list.append(term_dict)
...@@ -92,7 +92,38 @@ class TopicUtils(object): ...@@ -92,7 +92,38 @@ class TopicUtils(object):
return [] return []
@classmethod @classmethod
def get_recommend_topic_ids(cls,user_id,offset,size,query=None,query_type=TopicPageType.FIND_PAGE,filter_topic_id_list=[],must_topic_id_list=[]): def get_topic_tag_info(cls, offset, size, topic_id_list, user_id):
try:
q = {
"query": {
"terms": {
"id": topic_id_list
}
},
"_source": {
"includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=offset, size=size)
topic_id_dict = dict()
for item in result_dict["hits"]:
if "edit_tag_list" in item["_source"]:
topic_id_dict[str(item["_source"]["id"])] = item["_source"]["edit_tag_list"]
else:
topic_id_dict[str(item["_source"]["id"])] = list()
return topic_id_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
@classmethod
def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None,
query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[],
user_similar_score_list=[]):
""" """
:需增加打散逻辑 :需增加打散逻辑
:remark:获取首页推荐帖子列表 :remark:获取首页推荐帖子列表
...@@ -105,7 +136,8 @@ class TopicUtils(object): ...@@ -105,7 +136,8 @@ class TopicUtils(object):
try: try:
attention_user_id_list = list() attention_user_id_list = list()
pick_user_id_list = list() pick_user_id_list = list()
same_group_id_list = list() # same_group_id_list = list()
same_pictorial_id_list = list()
user_tag_list = list() user_tag_list = list()
result_dict = TopicUtils.get_related_user_info(user_id, 0, 1) result_dict = TopicUtils.get_related_user_info(user_id, 0, 1)
...@@ -118,85 +150,129 @@ class TopicUtils(object): ...@@ -118,85 +150,129 @@ class TopicUtils(object):
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"] pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list] pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"] # same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"]
same_group_id_list = [item["user_id"] for item in same_group_user_info_list] # same_group_id_list = [item["user_id"] for item in same_group_user_info_list]
# same_group_id_list = same_group_id_list[:100]
same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"] same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list]
same_pictorial_id_list = same_pictorial_id_list[:100]
# attention_user_id_term_list = cls.___get_should_term_list(attention_user_id_list,field_name="user_id") user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
# pick_user_id_term_list = cls.___get_should_term_list(pick_user_id_list,field_name="user_id")
# same_group_user_id_term_list = cls.___get_should_term_list(same_group_id_list,field_name="user_id")
q = dict() q = dict()
q["query"] = dict() q["query"] = dict()
functions_list = [ functions_list = [
{ {
"gauss": { "filter": {
"update_time": { "term": {
"scale": "1d", "language_type": 1
"decay": 0.9
} }
}, },
"weight": 5 "weight": 3
}, },
{ {
"gauss": { "linear": {
"create_time": { "create_time": {
"scale": "1d", "scale": "1d",
"decay": 0.9 "decay": 0.99
} }
}, },
"weight": 10 "weight": 500
} }
] ]
if len(attention_user_id_list)>0: if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]:
score_item = 3 * 10 * item[1]
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}}, "should": {"term": {"user_id": item[0]}}}},
"weight": score_item,
}
)
if len(attention_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3, "weight": 3,
} }
) )
if len(pick_user_id_list)>0: if len(pick_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}}, "should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2 "weight": 2
} }
) )
if len(same_group_id_list)>0: # if len(same_group_id_list)>0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms":{"user_id":same_group_id_list}}}},
# "weight": 1
# }
# )
if len(same_pictorial_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":same_group_id_list}}}}, "should": {"terms": {"user_id": same_pictorial_id_list}}}},
"weight": 1 "weight": 1
} }
) )
# query_tag_term_list = cls.___get_should_term_list(user_tag_list) # query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list)>0: if len(user_tag_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter":{"bool":{ "filter": {"bool": {
"should":{"terms":{"tag_list":user_tag_list}}}}, "should": {"terms": {"tag_list": user_tag_list}}}},
"weight": 1 "weight": 1
} }
) )
if len(recommend_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3
}
)
low_content_level = 4 if query_type==TopicPageType.FIND_PAGE else 3 low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = { query_function_score = {
"query": { "query": {
"bool": { "bool": {
"filter": [ "filter": [
{"range": {"content_level": {"gte": low_content_level, "lte": 5}}}, {"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}}, {"term": {"has_image": True}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}} {"term": {"is_deleted": False}}
],
"should": [
{
"bool": {
"must": [
{"term": {"has_image": True}},
{"term": {"has_video": False}}
] ]
} }
}, {
"bool": {
"must": {
"term": {"has_video": True}
}
}
}
],
"minimum_should_match": 1
}
}, },
"score_mode": "sum", "score_mode": "sum",
"boost_mode": "sum", "boost_mode": "sum",
...@@ -204,23 +280,23 @@ class TopicUtils(object): ...@@ -204,23 +280,23 @@ class TopicUtils(object):
} }
if len(must_topic_id_list) > 0: if len(must_topic_id_list) > 0:
query_function_score["query"]["bool"]["must"] = { query_function_score["query"]["bool"]["must"] = {
"terms":{ "terms": {
"id": must_topic_id_list "id": must_topic_id_list
} }
} }
if query_type==TopicPageType.FIND_PAGE and len(filter_topic_id_list)>0: if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = { query_function_score["query"]["bool"]["must_not"] = {
"terms":{ "terms": {
"id": filter_topic_id_list "id": filter_topic_id_list
} }
} }
if query is not None:#搜索帖子 if query is not None: # 搜索帖子
multi_fields = { multi_fields = {
'description': 200, 'description': 200,
'content': 300, 'content': 300,
'name': 400, 'name': 400,
'tag_name_list':300, 'tag_name_list': 300,
} }
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()] query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = { multi_match = {
...@@ -229,26 +305,31 @@ class TopicUtils(object): ...@@ -229,26 +305,31 @@ class TopicUtils(object):
'operator': 'and', 'operator': 'and',
'fields': query_fields, 'fields': query_fields,
} }
query_function_score["query"]["bool"]["should"] = [ query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match} {'multi_match': multi_match},
{"term": {"tag_list": tag_id}}
] ]
query_function_score["query"]["bool"]["minimum_should_match"] = 1 query_function_score["query"]["bool"]["minimum_should_match"] = 1
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
q["collapse"] = {
"field": "user_id"
}
q["_source"] = { q["_source"] = {
"includes":["id","group_id","offline_score"] "includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
} }
q["sort"] = [ q["sort"] = [
{ {
"_script":{ "_script": {
"type":"number", "type": "number",
"script":{ "script": {
"lang": "expression", "lang": "expression",
"source": "_score*doc['offline_score']" "source": "_score+doc['offline_score']"
# "lang":"painless", # "lang":"painless",
# "source":"_score+params._source.offline_score" # "source":"_score+params._source.offline_score"
}, },
"order":"desc" "order": "desc"
} }
}, },
"_score" "_score"
...@@ -256,17 +337,46 @@ class TopicUtils(object): ...@@ -256,17 +337,46 @@ class TopicUtils(object):
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=offset, size=size) offset=offset, size=size)
topic_id_dict = dict() topic_id_list = list()
for item in result_dict["hits"]: same_pictorial_id_set = set()
topic_id_dict[item["_source"]["id"]] = item["_source"]["group_id"] same_user_id_set = set()
return topic_id_dict for item in result_dict["hits"]:
topic_id_list.append(item["_source"]["id"])
# for item in result_dict["hits"]:
# if item["_source"]["group_id"]>0 and item["_source"]["group_id"] not in same_group_id_set:
# same_group_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_group_id_set.add(item["_source"]["id"])
#
# if item["_source"]["user_id"] not in same_user_id_set:
# same_user_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_user_id_set.add(item["_source"]["id"])
#
# if len(topic_id_list) >= single_size:
# break
#
# if len(topic_id_list) < single_size:
# for topic_id in same_group_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
# for topic_id in same_user_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
return topic_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict() return list()
@classmethod @classmethod
def get_topic_detail_recommend_list(cls,user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size,es_cli_obj=None): def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None):
""" """
:remark 帖子详情页推荐列表,缺少按时间衰减 :remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id: :param user_id:
...@@ -291,33 +401,35 @@ class TopicUtils(object): ...@@ -291,33 +401,35 @@ class TopicUtils(object):
"weight": 1000 "weight": 1000
}, },
{ {
"gauss": { "linear": {
"update_time": { "create_time": {
"scale": "1d", "scale": "1d",
"decay": 0.5 "decay": 0.5
} }
} }
} }
] ]
if isinstance(topic_group_id,int) and topic_group_id > 0: if isinstance(topic_pictorial_id, int) and topic_pictorial_id > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"term": { "filter": {"term": {
"group_id": topic_group_id}}, "pictorial_id": topic_pictorial_id}},
"weight": 1, "weight": 1,
} }
) )
# query_tag_term_list = cls.___get_should_term_list(topic_tag_list) have_read_topic_list.append(topic_id)
query_function_score = { query_function_score = {
"query":{ "query": {
"bool":{ "bool": {
"must": [ "must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}} {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
], ],
"must_not":{ "must_not": {
"term":{ "terms": {
"id":topic_id "id": have_read_topic_list
} }
} }
} }
...@@ -329,15 +441,15 @@ class TopicUtils(object): ...@@ -329,15 +441,15 @@ class TopicUtils(object):
if filter_topic_user_id: if filter_topic_user_id:
query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}}) query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}})
if len(topic_tag_list)>0: if len(topic_tag_list) > 0:
query_function_score["query"]["bool"]["should"]={ query_function_score["query"]["bool"]["should"] = {
"terms":{ "terms": {
"tag_list":topic_tag_list "tag_list": topic_tag_list
} }
} }
q["query"]["function_score"] = query_function_score q["query"]["function_score"] = query_function_score
q["_source"] = { q["_source"] = {
"includes":["id","group_id","user_id","_score"] "includes": ["id", "pictorial_id", "user_id", "_score"]
} }
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q,
...@@ -349,7 +461,7 @@ class TopicUtils(object): ...@@ -349,7 +461,7 @@ class TopicUtils(object):
return [] return []
@classmethod @classmethod
def get_topic_tag_id_list(cls,topic_id,es_cli_obj=None): def get_topic_tag_id_list(cls, topic_id, es_cli_obj=None):
""" """
:remark 获取帖子标签列表 :remark 获取帖子标签列表
:param topic_id: :param topic_id:
...@@ -361,18 +473,18 @@ class TopicUtils(object): ...@@ -361,18 +473,18 @@ class TopicUtils(object):
q = dict() q = dict()
q["query"] = { q["query"] = {
"term":{ "term": {
"id": topic_id "id": topic_id
} }
} }
q["_source"] = { q["_source"] = {
"includes":[TopicDocumentField.TAG_LIST] "includes": [TopicDocumentField.TAG_LIST]
} }
result_dict = ESPerform.get_search_results(es_cli_obj,sub_index_name="topic",query_body=q,size=1) result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, size=1)
tag_id_list = [] tag_id_list = []
if len(result_dict["hits"])>0: if len(result_dict["hits"]) > 0:
tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST] tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST]
return tag_id_list return tag_id_list
...@@ -380,9 +492,8 @@ class TopicUtils(object): ...@@ -380,9 +492,8 @@ class TopicUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list() return list()
@classmethod @classmethod
def get_tag_aggregation_topic_id_list(cls,user_id,tag_id,offset,size): def get_tag_aggregation_topic_id_list(cls, user_id, tag_id, offset, size):
try: try:
attention_user_id_list = list() attention_user_id_list = list()
pick_user_id_list = list() pick_user_id_list = list()
...@@ -399,8 +510,8 @@ class TopicUtils(object): ...@@ -399,8 +510,8 @@ class TopicUtils(object):
functions_list = [ functions_list = [
{ {
"gauss": { "linear": {
"update_time": { "create_time": {
"scale": "1d", "scale": "1d",
"decay": 0.5 "decay": 0.5
} }
...@@ -408,34 +519,34 @@ class TopicUtils(object): ...@@ -408,34 +519,34 @@ class TopicUtils(object):
} }
] ]
if len(attention_user_id_list)>0: if len(attention_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}}, "should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3, "weight": 3,
} }
) )
if len(pick_user_id_list)>0: if len(pick_user_id_list) > 0:
functions_list.append( functions_list.append(
{ {
"filter": {"bool": { "filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}}, "should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2 "weight": 2
} }
) )
query_function_score = { query_function_score = {
"query":{ "query": {
"bool":{ "bool": {
"must": [ "must": [
#{"range": {"content_level": {"gte": 3, "lte": 5}}}, # {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}}, {"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
{"term": {"tag_list":tag_id}} {"term": {"tag_list": tag_id}}
], ],
"must_not":[ "must_not": [
{"terms": {"content_level": [1,2]}} {"terms": {"content_level": [1, 2]}}
] ]
} }
}, },
...@@ -446,22 +557,22 @@ class TopicUtils(object): ...@@ -446,22 +557,22 @@ class TopicUtils(object):
q = dict() q = dict()
q["query"] = { q["query"] = {
"function_score":query_function_score "function_score": query_function_score
} }
q["_source"] = { q["_source"] = {
"includes":["id","group_id","user_id","_score","offline_score","manual_score"] "includes": ["id", "pictorial_id", "user_id", "_score", "offline_score", "manual_score"]
} }
q["sort"] = [ q["sort"] = [
{ {
"_script":{ "_script": {
"type":"number", "type": "number",
"script":{ "script": {
"lang": "expression", "lang": "expression",
"source": "_score+doc['offline_score']+doc['manual_score']" "source": "_score+doc['offline_score']+doc['manual_score']"
# "lang":"painless", # "lang":"painless",
# "source":"_score+params._source.offline_score+params._source.manual_score" # "source":"_score+params._source.offline_score+params._source.manual_score"
}, },
"order":"desc" "order": "desc"
} }
} }
] ]
...@@ -475,22 +586,69 @@ class TopicUtils(object): ...@@ -475,22 +586,69 @@ class TopicUtils(object):
return list() return list()
@classmethod @classmethod
def process_filters(cls, filters): def process_filters(cls, filters, filter_online=True):
"""处理过滤器部分。""" """处理过滤器部分。"""
f = [ f = [
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}, {"term": {"is_deleted": False}},
] ]
logging.info("get filters:%s"%filters)
if not filters: if not filters:
return f return f
for k, v in filters.items(): for k, v in filters.items():
if k == "group_id": if k == "is_complaint":
f.append({
"term": {k: v},
})
if not v:
continue
if k in ["create_time_gte", "create_time_lte"]:
if k == "create_time_gte":
op = "gte"
elif k == "create_time_lte":
op = "lte"
f.append({ f.append({
"term": {"group_id": v}, "range": {
"create_time_val": {
op: v,
}
}
}) })
elif k in ["id_gte", "id_lte"]:
if k == "id_gte":
op = "gte"
elif k == "id_lte":
op = "lte"
f.append({
"range": {
"id": {
op: v,
}
}
})
else:
if isinstance(v, list):
f.append({
"terms": {k: v},
})
else:
f.append({
"term": {k: v},
})
if filter_online:
f.append({"term": {"is_online": True}})
return f return f
@classmethod @classmethod
...@@ -511,43 +669,144 @@ class TopicUtils(object): ...@@ -511,43 +669,144 @@ class TopicUtils(object):
"""处理排序部分。""" """处理排序部分。"""
sort_rule = [] sort_rule = []
if isinstance(sorts_by, int):
if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM: if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM:
sort_rule.append({ sort_rule.append({
"vote_num":{ "vote_num": {
"order":"desc" "order": "desc"
},
"update_time": {
"order": "desc"
},
})
elif isinstance(sorts_by, list):
for sort_by in sorts_by:
if sort_by == TOPIC_SEARCH_SORT.ID_AEC:
sort_rule.append({
"id": {
"order": "asc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.ID_DESC:
sort_rule.append({
"id": {
"order": "desc"
}, },
"update_time":{ })
"order":"desc" elif sort_by == TOPIC_SEARCH_SORT.SCORE_AEC:
sort_rule.append({
"sort_score": {
"order": "asc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.SCORE_DESC:
sort_rule.append({
"sort_score": {
"order": "desc"
}, },
}) })
return sort_rule return sort_rule
@classmethod @classmethod
def list_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10): def list_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
q = { q = {
"query": { "query": {
"bool": { "bool": {
"must": cls.process_filters(filters), "must": must,
"must_not": cls.process_nfilters(nfilters),
}
}
}
if sorts_by:
sorts = cls.process_sort(sorts_by)
if sorts:
q["sort"] = sorts
try:
result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {
"hits": [],
"total_count": 0
}
@classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
query = ''
for k, v in filters.items():
if k == "content":
query = filters[k]
q = {}
q["query"] = {
"function_score": {
"functions": [{
"filter": {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters), "must_not": cls.process_nfilters(nfilters),
} }
}, },
"_source": { "weight": 1
"includes":["id"] }],
}, "query": {
"sort": [], "multi_match": {
"fields":["content"],
"type": "cross_fields",
"operator": "and",
"query": query
}
}
}
}
if query == '':
q["query"] = {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
} }
}
logging.info("get query business_topic:%s" % q)
if sorts_by: if sorts_by:
sorts = cls.process_sort(sorts_by) sorts = cls.process_sort(sorts_by)
if sorts:
q["sort"] = sorts q["sort"] = sorts
try: try:
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", result_dict = ESPerform.get_search_results(
query_body=q, offset=offset, size=size) ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return result_dict["hits"] return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return {
"hits": [],
"total_count": 0
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from search.utils.topic import TopicUtils
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from libs.es import ESPerform
@bind("physical/search/business/topic")
def business_topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10):
"""帖子搜索。"""
try:
result_list = TopicUtils.business_topic_ids(
filters=filters, nfilters=nfilters, sorts_by=sorts_by,
offset=offset, size=size, filter_online=False,
index_name="topic"
)
logging.info("get result_lsit:%s"%result_list)
topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {"topic_ids": topic_ids, "total_count": result_list["total_count"]}
except:
logging.error("catch exception, err_msg:%s" % traceback.format_exc())
return {"topic_ids": [], "total_count": 0}
...@@ -12,8 +12,8 @@ from search.utils.common import GroupSortTypes ...@@ -12,8 +12,8 @@ from search.utils.common import GroupSortTypes
from libs.es import ESPerform from libs.es import ESPerform
@bind("physical/search/query_group") @bind("physical/search/query_pictorial")
def query_group(query="",offset=0,size=10): def query_group(query="", offset=0, size=10):
""" """
:remark:小组搜索排序策略,缺少排序策略 :remark:小组搜索排序策略,缺少排序策略
:param query: :param query:
...@@ -22,7 +22,7 @@ def query_group(query="",offset=0,size=10): ...@@ -22,7 +22,7 @@ def query_group(query="",offset=0,size=10):
:return: :return:
""" """
try: try:
result_dict = GroupUtils.get_group_query_result(query,offset,size) result_dict = GroupUtils.get_group_query_result(query, offset, size)
group_ids_list = [] group_ids_list = []
if len(result_dict["hits"]) > 0: if len(result_dict["hits"]) > 0:
...@@ -31,10 +31,11 @@ def query_group(query="",offset=0,size=10): ...@@ -31,10 +31,11 @@ def query_group(query="",offset=0,size=10):
return {"group_ids": group_ids_list} return {"group_ids": group_ids_list}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_ids":[]} return {"group_ids": []}
@bind("physical/search/group_sort")
def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=10): @bind("physical/search/pictorial_sort")
def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0, size=10):
""" """
:remark 小组排序,缺少:前1天发评论人数*x :remark 小组排序,缺少:前1天发评论人数*x
:param user_id: :param user_id:
...@@ -44,34 +45,165 @@ def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=1 ...@@ -44,34 +45,165 @@ def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=1
:return: :return:
""" """
try: try:
if not isinstance(user_id,int): if not isinstance(user_id, int):
user_id = -1 user_id = -1
#获取es链接对象 # 获取es链接对象
es_cli_obj = ESPerform.get_cli() es_cli_obj = ESPerform.get_cli()
if sort_type==GroupSortTypes.HOT_RECOMMEND: if sort_type == GroupSortTypes.HOT_RECOMMEND:
group_ids_list = GroupUtils.get_hot_group_recommend_result_list(offset,size,es_cli_obj) pictorial_ids_list = GroupUtils.get_hot_pictorial_recommend_result_list(offset, size, es_cli_obj)
return {"group_recommend_ids":group_ids_list} return {"pictorial_recommend_ids": pictorial_ids_list}
elif sort_type==GroupSortTypes.ATTENTION_RECOMMEND: elif sort_type == GroupSortTypes.ATTENTION_RECOMMEND:
attention_group_list = GroupUtils.get_user_attention_group_list(user_id,offset=0,size=1,es_cli_obj=es_cli_obj) attention_pictorial_list = GroupUtils.get_user_attention_pictorial_list(user_id, offset=0, size=1,
if len(attention_group_list)==0: es_cli_obj=es_cli_obj)
return {"group_recommend_ids": []} if len(attention_pictorial_list) == 0:
return {"pictorial_recommend_ids": []}
else: else:
attention_group_id_list = [item["group_id"] for item in attention_group_list] attention_pictorial_id_list = [item["pictorial_id"] for item in attention_pictorial_list]
sorted_group_ids_list = GroupUtils.get_group_ids_by_aggs(attention_group_id_list,es_cli_obj) sorted_pictorial_ids_list = GroupUtils.get_pictorial_ids_by_aggs(attention_pictorial_id_list,
es_cli_obj)
pictorial_recommend_ids_list = sorted_pictorial_ids_list
# if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list):
sorted_attention_pictorial_list = sorted(attention_pictorial_list,
key=lambda item: item["update_time_val"], reverse=True)
for item in sorted_attention_pictorial_list:
if item["pictorial_id"] not in pictorial_recommend_ids_list:
pictorial_recommend_ids_list.append(item["pictorial_id"])
return {"pictorial_recommend_ids": pictorial_recommend_ids_list[offset:(offset + size)]}
group_recommend_ids_list = sorted_group_ids_list except:
#if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list): logging.error("catch exception,err_msg:%s" % traceback.format_exc())
sorted_attention_group_list = sorted(attention_group_list,key=lambda item:item["update_time_val"],reverse=True) return {"pictorial_recommend_ids": []}
for item in sorted_attention_group_list:
if item["group_id"] not in group_recommend_ids_list:
group_recommend_ids_list.append(item["group_id"]) @bind("physical/search/pictorial_topic")
def pictorial_topic(topic_id=-1, offset=0, size=10):
"""
:remark 入选画报
:param user_id:
:param sort_type:
:param offset:
:param size:
:return:
"""
try:
if not isinstance(topic_id, int):
user_id = -1
return {"group_recommend_ids": group_recommend_ids_list[offset:(offset+size)]} # 获取es链接对象
es_cli_obj = ESPerform.get_cli()
q = {}
# 获取帖子从属的画报
q["query"] = {
"term": {
"id": topic_id
}
}
q["_source"] = {
"include": ["id", "pictorial_id", "tag_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get result_dict:%s" % result_dict)
pict_pictorial_ids_list = []
topic_tag_list = []
pictorial_id_list = []
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
pict_pictorial_ids_list = item["_source"]["pictorial_id"]
topic_tag_list = item["_source"]["tag_list"]
q["query"] = {
"bool": {
"must": [{
"terms": {
"id": pict_pictorial_ids_list
}
},
{
"term": {
"is_online": True
}
},
{
"term": {
"is_deleted": False
}
},
]
}
}
q["_source"] = {
"include": ["id", "update_time"]
}
q["sort"] = {
'update_time': {
'order': 'desc'
}
}
result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
pictorial_id = item["_source"]["id"]
pictorial_id_list.append(pictorial_id)
logging.info("get pictorial_id_list:%s" % pictorial_id_list)
logging.info("get topic_tag_list:%s" % topic_tag_list)
if len(pictorial_id_list) < 10:
num = 10 - len(pictorial_id_list)
q["query"] = {
"bool": {
"must": [{
"terms": {
"tag_id": topic_tag_list
}}, {
"term": {
"is_online": True
}
},{
"term": {
"is_deleted": False
}
}]
}
}
q["_source"] = {
"include": ["id", "tag_id"]}
q["sort"] = {
'update_time': {
'order': 'desc'
}}
q["from"] = 0
q["size"] = 10
result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
id = item["_source"]["id"]
if id not in pictorial_id_list:
pictorial_id_list.append(id) #
logging.info("get result_dict tag:%s" % result_dict)
logging.info("get pictorial_id_list tag:%s" % pictorial_id_list)
pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10]
return {"pictorial_ids_list": pictorial_list}
else:
return {"pictorial_ids_list": pictorial_id_list}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_recommend_ids":[]} return {"pictorial_ids_list": []}
...@@ -13,7 +13,7 @@ from search.utils.common import * ...@@ -13,7 +13,7 @@ from search.utils.common import *
from libs.es import ESPerform from libs.es import ESPerform
def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE): def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
try: try:
if user_id == -1: if user_id == -1:
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id
...@@ -21,112 +21,138 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType. ...@@ -21,112 +21,138 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) redis_key = "physical:discover_page" + ":user_id:" + str(user_id)
redis_field_list = [b'have_read_topic_id'] redis_field_list = [b'have_read_topic_id']
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list) redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else [] have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else []
recommend_topic_ids = [] recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size,query_type=query_type,filter_topic_id_list=have_read_topic_id_list) query_type=query_type,
filter_topic_id_list=have_read_topic_id_list)
for topic_id in topic_id_dict:
recommend_topic_ids.append(topic_id)
have_read_topic_id_list.extend(recommend_topic_ids) have_read_topic_id_list.extend(recommend_topic_ids)
redis_dict = { redis_dict = {
"have_read_topic_id": json.dumps(have_read_topic_id_list) "have_read_topic_id": json.dumps(have_read_topic_id_list)
} }
redis_client.hmset(redis_key,redis_dict) redis_client.hmset(redis_key, redis_dict)
return recommend_topic_ids return recommend_topic_ids
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query=None,
query_type=TopicPageType.HOME_RECOMMEND):
try: try:
if query is None: if query is None:
if user_id == -1: if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type) redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else: else:
if user_id == -1: if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type) redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else: else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type) redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'last_offset_num', b'unread_topic_id'] redis_field_list = [b'have_read_topic_list']
for page_id in range(0,offset,size): redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1 tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
recommend_topic_ids = []
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type) recommend_tag_list = []
have_read_group_id_set = set() tag_recommend_val = redis_client.get(tag_recommend_redis_key)
unread_topic_id_dict = dict() if tag_recommend_val:
have_read_topic_id_set = set() recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
if redis_field_val_list[1] and offset>0:
if (user_id>0 and offset==last_offset_num) or user_id==-1:
ori_unread_topic_id_dict = json.loads(redis_field_val_list[1])
if len(ori_unread_topic_id_dict) > 0:
topic_id_dict.update(ori_unread_topic_id_dict)
for have_read_item in redis_field_val_list[2:]:
if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量
cur_page_group_num = 0
# 当前页用户数量
cur_page_user_num = 0
for topic_id in topic_id_dict:
if topic_id_dict[topic_id] in have_read_group_id_set:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
else:
if topic_id not in have_read_topic_id_set:
if isinstance(topic_id_dict[topic_id],int) and topic_id_dict[topic_id]>0 and cur_page_group_num<(size*0.9):
have_read_group_id_set.add(topic_id_dict[topic_id])
have_read_topic_id_set.add(topic_id)
cur_page_group_num += 1
recommend_topic_ids.append(topic_id)
elif topic_id_dict[topic_id] and cur_page_user_num<(size*0.1):
cur_page_user_num += 1
recommend_topic_ids.append(topic_id)
else:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
if len(recommend_topic_ids) >= size: recommend_topic_ids = []
break have_read_topic_id_list = list()
if redis_field_val_list[0] and query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
offi_unread_topic_id_dict = dict() user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
if len(recommend_topic_ids) < size and len(unread_topic_id_dict)>0: redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
for unread_topic_id in unread_topic_id_dict: user_similar_score_redis_list = json.loads(
if len(recommend_topic_ids)<size: redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
recommend_topic_ids.append(unread_topic_id)
else:
offi_unread_topic_id_dict[unread_topic_id] = unread_topic_id_dict[unread_topic_id]
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list,
user_similar_score_list=user_similar_score_redis_list)
have_read_group_id_set = set()
have_read_user_id_set = set()
unread_topic_id_dict = dict()
# # 当前页小组数量
# cur_page_group_num = 0
# # 当前页用户数量
# cur_page_user_num = 0
#
# for topic_id in topic_id_dict:
# if topic_id_dict[topic_id][0] in have_read_group_id_set or topic_id_dict[topic_id][
# 1] in have_read_user_id_set:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
# else:
# if isinstance(topic_id_dict[topic_id][0], int) and topic_id_dict[topic_id][
# 0] > 0 and cur_page_group_num < (size * 0.9):
# have_read_group_id_set.add(topic_id_dict[topic_id][0])
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# have_read_topic_id_list.append(topic_id)
# cur_page_group_num += 1
# recommend_topic_ids.append(topic_id)
# elif topic_id_dict[topic_id] and cur_page_user_num < (size * 0.1):
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# cur_page_user_num += 1
# recommend_topic_ids.append(topic_id)
# have_read_topic_id_list.append(topic_id)
# else:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
#
# if len(recommend_topic_ids) >= size:
# break
# if len(recommend_topic_ids) < size and len(unread_topic_id_dict) > 0:
# for unread_topic_id in unread_topic_id_dict:
# if len(recommend_topic_ids) < size:
# recommend_topic_ids.append(unread_topic_id)
# have_read_topic_id_list.append(unread_topic_id)
# else:
# break
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000:
cut_len = len(have_read_topic_id_list)-5000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = { redis_dict = {
"unread_topic_id":json.dumps(offi_unread_topic_id_dict), "have_read_topic_list": json.dumps(have_read_topic_id_list),
"last_offset_num":offset+size,
offset: json.dumps(recommend_topic_ids)
} }
redis_client.hmset(redis_key,redis_dict) redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟 # 每个session key保存15分钟
redis_client.expire(redis_key,15*60) redis_client.expire(redis_key, 60 * 60 * 24 * 3)
return recommend_topic_ids return topic_id_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
@bind("physical/search/query_tag_id_by_topic")
def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1):
try:
return TopicUtils.get_topic_tag_info(offset, size, topic_id_list, user_id)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
@bind("physical/search/home_recommend") @bind("physical/search/home_recommend")
def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPageType.HOME_RECOMMEND): def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND):
""" """
:remark:首页推荐,目前只推荐日记 :remark:首页推荐,目前只推荐日记
:param session_id: :param session_id:
...@@ -137,20 +163,43 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage ...@@ -137,20 +163,43 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query_type=query_type) recommend_topic_ids = list()
return {"recommend_topic_ids":recommend_topic_ids} es_node_load_high_flag = False
try:
es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli())
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
es_node_load_high_flag = True
if es_node_load_high_flag:
temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1"
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(temp_downgrading_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
if len(have_read_topic_id_list) > offset:
recommend_topic_ids = have_read_topic_id_list[offset:offset+size]
else:
recommend_topic_ids = have_read_topic_id_list[0:size]
else:
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0, offset=0, size=size,
query_type=query_type)
return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/discover_page") @bind("physical/search/discover_page")
def discover_page(device_id="",user_id=-1,size=10): def discover_page(device_id="", user_id=-1, size=10):
""" """
:remark:首页推荐,目前只推荐日记 :remark:首页推荐,目前只推荐日记
:param session_id: :param session_id:
...@@ -161,20 +210,20 @@ def discover_page(device_id="",user_id=-1,size=10): ...@@ -161,20 +210,20 @@ def discover_page(device_id="",user_id=-1,size=10):
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE) recommend_topic_ids = get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE)
return {"recommend_topic_ids":recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/home_query") @bind("physical/search/home_query")
def home_query(device_id="",user_id=-1,query="",offset=0,size=10): def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10):
""" """
:remark:首页搜索,目前只推荐日记 :remark:首页搜索,目前只推荐日记
:param session_id: :param session_id:
...@@ -186,19 +235,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10): ...@@ -186,19 +235,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
""" """
try: try:
if not user_id: if not user_id:
user_id=-1 user_id = -1
if not isinstance(device_id,str): if not isinstance(device_id, str):
device_id = "" device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query) recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query)
return {"recommend_topic_ids":recommend_topic_ids} return {"recommend_topic_ids": recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []} return {"recommend_topic_ids": []}
@bind("physical/search/topic_detail_page_recommend") @bind("physical/search/topic_detail_page_recommend")
def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_user_id=-1,filter_topic_user_id=False,offset=0,size=10): def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pictorial_id=-1, topic_user_id=-1,
filter_topic_user_id=False, offset=0, size=10):
""" """
:remark:帖子详情页推荐策略,缺少第一个卡片策略 :remark:帖子详情页推荐策略,缺少第一个卡片策略
:param user_id: :param user_id:
...@@ -208,20 +258,33 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u ...@@ -208,20 +258,33 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
:return: :return:
""" """
try: try:
if not isinstance(user_id,int): if not isinstance(user_id, int):
user_id = -1 user_id = -1
#获取es链接对象 redis_key = "physical:topic_detail_page_recommend" + ":user_id:" + str(user_id) + ":device_id:" + str(device_id)
have_read_topic_redis_data = redis_client.get(redis_key)
have_read_topic_list = json.loads(have_read_topic_redis_data) if have_read_topic_redis_data else []
# 获取es链接对象
es_cli_obj = ESPerform.get_cli() es_cli_obj = ESPerform.get_cli()
# 获取帖子标签列表 # 获取帖子标签列表
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id,es_cli_obj) topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id, es_cli_obj)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size,es_cli_obj) result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj)
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list)>0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
have_read_topic_list.extend(recommend_topic_ids_list)
have_read_topic_len = len(have_read_topic_list)
if have_read_topic_len > 5000:
have_read_topic_list = have_read_topic_list[(have_read_topic_len - 5000):]
redis_client.set(redis_key, json.dumps(have_read_topic_list))
return {"recommend_topic_ids": recommend_topic_ids_list} return {"recommend_topic_ids": recommend_topic_ids_list}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -229,7 +292,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u ...@@ -229,7 +292,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
@bind("physical/search/query_topic_by_tag_aggregation") @bind("physical/search/query_topic_by_tag_aggregation")
def query_topic_by_tag_aggregation(user_id,tag_id, offset, size): def query_topic_by_tag_aggregation(user_id, tag_id, offset, size):
""" """
:remark 按标签聚合召回帖子 :remark 按标签聚合召回帖子
:param tag_id: :param tag_id:
...@@ -243,9 +306,9 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size): ...@@ -243,9 +306,9 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
if not tag_id: if not tag_id:
tag_id = -1 tag_id = -1
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id,tag_id,offset,size) result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id, tag_id, offset, size)
recommend_topic_ids_list = list() recommend_topic_ids_list = list()
if len(result_list)>0: if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list] recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_id": recommend_topic_ids_list} return {"recommend_topic_id": recommend_topic_ids_list}
...@@ -261,11 +324,15 @@ def topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10): ...@@ -261,11 +324,15 @@ def topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10):
try: try:
result_list = TopicUtils.list_topic_ids(filters=filters, nfilters=nfilters, result_list = TopicUtils.list_topic_ids(filters=filters, nfilters=nfilters,
sorts_by=sorts_by, offset=offset, size=size) sorts_by=sorts_by, offset=offset, size=size)
topic_ids = [item["_source"]["id"] for item in result_list] logging.info("get result_list:%s"%result_list)
return {"topic_ids": topic_ids} topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {
"topic_ids": topic_ids,
"total_count": result_list["total_count"]
}
except: except:
logging.error("catch exception, err_msg:%s" % traceback.format_exc()) logging.error("catch exception, err_msg:%s" % traceback.format_exc())
return {"topic_ids": []} return {"topic_ids": [], "total_count": 0}
@bind("physical/search/query_topic_by_user_similarity") @bind("physical/search/query_topic_by_user_similarity")
...@@ -279,10 +346,10 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1 ...@@ -279,10 +346,10 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
""" """
try: try:
must_topic_id_list = list(topic_similarity_score_dict.keys()) must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id=-1, offset=offset, size=size,must_topic_id_list=must_topic_id_list) topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list)
recommend_topic_ids = list(topic_id_dict.keys()) return {"recommend_topic_ids": topic_id_list}
return {"recommend_topic_ids":recommend_topic_ids}
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_id": []} return {"recommend_topic_id": []}
...@@ -24,8 +24,10 @@ def recommend_user(self_user_id,interesting_user_id,offset=0,size=10): ...@@ -24,8 +24,10 @@ def recommend_user(self_user_id,interesting_user_id,offset=0,size=10):
:return: :return:
""" """
try: try:
if not isinstance(self_user_id,int): if not isinstance(self_user_id, int):
self_user_id = -1 self_user_id = -1
if not isinstance(interesting_user_id, int):
interesting_user_id = -1
#获取es链接对象 #获取es链接对象
es_cli_obj = ESPerform.get_cli() es_cli_obj = ESPerform.get_cli()
......
...@@ -19,6 +19,15 @@ from libs.cache import redis_client ...@@ -19,6 +19,15 @@ from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar
import json import json
from search.utils.topic import TopicUtils
from trans2es.models.pick_topic import PickTopic
from trans2es.models.tag import TopicTag,Tag
from trans2es.models.user_extra import UserExtra
from trans2es.models.group import Group
from trans2es.models.topic import Topic,ActionSumAboutTopic
from search.utils.common import *
from linucb.views.collect_data import CollectData
from injection.data_sync.tasks import sync_user_similar_score
class Job(object): class Job(object):
__es = None __es = None
...@@ -40,7 +49,6 @@ class Job(object): ...@@ -40,7 +49,6 @@ class Job(object):
def __call__(self): def __call__(self):
type_info = get_type_info_map()[self._type_name] type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo) assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk( result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name, sub_index_name=self._sub_index_name,
table_chunk=self._chunk, table_chunk=self._chunk,
...@@ -97,7 +105,9 @@ class Command(BaseCommand): ...@@ -97,7 +105,9 @@ class Command(BaseCommand):
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''), make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True), make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True), make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='') make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default=''),
make_option('-T', '--test_score', dest='test_score', help='test_score', metavar='TYPE', default='')
) )
def __sync_data_by_type(self, type_name): def __sync_data_by_type(self, type_name):
...@@ -116,6 +126,68 @@ class Command(BaseCommand): ...@@ -116,6 +126,68 @@ class Command(BaseCommand):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def generate_topic_score_detail(self):
try:
topic_id_dict = TopicUtils.get_recommend_topic_ids(241432787,0, 0, 500,query_type=TopicPageType.HOME_RECOMMEND,test_score=True)
for topic_id in topic_id_dict:
offline_score = 0.0
user_is_shadow = False
topic_sql_item = Topic.objects.filter(id=topic_id)
user_is_recommend=0.0
# 是否官方推荐用户
user_query_results = UserExtra.objects.filter(user_id=topic_sql_item[0].user_id)
if user_query_results.count() > 0:
if user_query_results[0].is_recommend:
offline_score += 2.0
user_is_recommend = 2.0
elif user_query_results[0].is_shadow:
user_is_shadow = True
group_is_recommend=0.0
# 是否官方推荐小组
# if topic_sql_item[0].group and topic_sql_item[0].group.is_recommend:
# offline_score += 4.0
# group_is_recommend = 4.0
topic_level_score = 0.0
# 帖子等级
if topic_sql_item[0].content_level == '5':
offline_score += 6.0
topic_level_score = 6.0
elif topic_sql_item[0].content_level == '4':
offline_score += 5.0
topic_level_score = 5.0
elif topic_sql_item[0].content_level == '3':
offline_score += 2.0
topic_level_score = 2.0
exposure_count = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=1).count()
click_count = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=3).count()
exposure_score = 0.0
uv_score = 0.0
if exposure_count > 0:
offline_score += click_count / exposure_count
exposure_score = click_count / exposure_count
if uv_num > 0:
offline_score += (topic_sql_item[0].vote_num / uv_num + topic_sql_item[0].reply_num / uv_num)
uv_score = (topic_sql_item[0].vote_num / uv_num + topic_sql_item[0].reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
"""
if user_is_shadow:
offline_score = offline_score * 0.5
logging.info("test_score######topic_id:%d,score:%f,offline_score:%f,user_is_recommend:%f,group_is_recommend:%f,topic_level_score:%f,exposure_score:%f,uv_score:%f"
% (topic_id,topic_id_dict[topic_id][2],offline_score,user_is_recommend,group_is_recommend,topic_level_score,exposure_score,uv_score))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options): def handle(self, *args, **options):
try: try:
type_name_list = get_type_info_map().keys() type_name_list = get_type_info_map().keys()
...@@ -129,6 +201,14 @@ class Command(BaseCommand): ...@@ -129,6 +201,14 @@ class Command(BaseCommand):
if len(options["sync_type"]) and options["sync_type"]=="sync_data_to_es": if len(options["sync_type"]) and options["sync_type"]=="sync_data_to_es":
SyncDataToRedis.sync_face_similar_data_to_redis() SyncDataToRedis.sync_face_similar_data_to_redis()
if len(options["test_score"]):
self.generate_topic_score_detail()
if len(options["sync_type"]) and options["sync_type"]=="linucb":
collect_obj = CollectData()
collect_obj.consume_data_from_kafka()
if len(options["sync_type"]) and options["sync_type"]=="similar":
sync_user_similar_score()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -41,11 +41,12 @@ class Command(BaseCommand): ...@@ -41,11 +41,12 @@ class Command(BaseCommand):
official_index_name = ESPerform.get_official_index_name(type_name) official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name) index_exists = es_cli.indices.exists(official_index_name)
if not index_exists: if not index_exists:
logging.info("begin create [%s] index and mapping!" % type_name) logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name) ESPerform.create_index(es_cli, type_name)
ESPerform.put_index_mapping(es_cli, type_name)
else: logging.info("begin create [%s] mapping!" % type_name)
logging.warning("index:[%s] has already existing!" % type_name) ESPerform.put_index_mapping(es_cli, type_name, force_sync=True)
if len(options["indices_template"]): if len(options["indices_template"]):
template_file_name = options["indices_template"] template_file_name = options["indices_template"]
......
{ {
"index_patterns": ["*"], "index_patterns": ["*"],
"settings":{ "settings":{
"number_of_shards": 9, "number_of_shards": 5,
"number_of_replicas": 2, "number_of_replicas": 3,
"index":{ "index":{
"analysis":{ "analysis":{
"filter":{ "filter":{
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"is_recommend":{"type":"boolean"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_num":{"type":"long"},
"creator_id":{"type":"long"},
"icon":{"type":"text"},
"high_quality_topic_num":{"type":"long"},//前一天该小组4&5星帖子数量
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"tag_id":{"type":"long"},
"tag_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_id_list":{"type":"long"}
}
}
\ No newline at end of file
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
}, },
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"tag_type":{"type":"long"}, "tag_type":{"type":"long"},
"collection":{"type":"long"},
"is_ai":{"type":"long"},
"is_own":{"type":"long"},
"is_online":{"type":"keyword"},//上线 "is_online":{"type":"keyword"},//上线
"is_deleted":{"type":"keyword"}, "is_deleted":{"type":"keyword"},
"near_new_topic_num":{"type":"long","store": true} "near_new_topic_num":{"type":"long","store": true}
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
...@@ -13,15 +13,41 @@ ...@@ -13,15 +13,41 @@
"user_id":{"type":"long"}, "user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID "group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性 "tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"}, "tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"}, "share_num":{"type":"long"},
"pick_id_list":{"type":"long"}, "pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分 "offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分 "manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图 "has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"}, "create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"}, "update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"}, "create_time_val":{"type":"long"},
"update_time_val":{"type":"long"} "update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
} }
} }
...@@ -28,17 +28,31 @@ ...@@ -28,17 +28,31 @@
"country_id":{"type":"text"} "country_id":{"type":"text"}
} }
}, },
"same_group_user_id_list":{//同组用户列表 // "same_group_user_id_list":{//同组用户列表
// "type":"nested",
// "properties":{
// "user_id":{"type":"long"},
// "country_id":{"type":"text"}
// }
// },
// "attention_group_id_list":{//关注小组列表
// "type":"nested",
// "properties":{
// "group_id":{"type":"long"},
// "update_time_val":{"type":"long"}
// }
// },
"same_pictorial_user_id_list":{//同画报用户列表
"type":"nested", "type":"nested",
"properties":{ "properties":{
"user_id":{"type":"long"}, "user_id":{"type":"long"},
"country_id":{"type":"text"} "country_id":{"type":"text"}
} }
}, },
"attention_group_id_list":{//关注小组列表 "attention_pictorial_id_list":{//关注画报列表
"type":"nested", "type":"nested",
"properties":{ "properties":{
"group_id":{"type":"long"}, "pictorial_id":{"type":"long"},
"update_time_val":{"type":"long"} "update_time_val":{"type":"long"}
} }
}, },
......
...@@ -12,4 +12,5 @@ from .tag import AccountUserTag ...@@ -12,4 +12,5 @@ from .tag import AccountUserTag
from .user import User from .user import User
from .group import Group from .group import Group
from .topic import Topic from .pictorial import PictorialFollow
from .pictorial import Pictorial
...@@ -30,7 +30,7 @@ class Celebrity(models.Model): ...@@ -30,7 +30,7 @@ class Celebrity(models.Model):
def get_pick_id_list(self): def get_pick_id_list(self):
try: try:
pick_id_list = list() pick_id_list = list()
query_results = PickCelebrity.objects.filter(celebrity_id=self.id,is_deleted=False) query_results = PickCelebrity.objects.using(settings.SLAVE_DB_NAME).filter(celebrity_id=self.id,is_deleted=False)
for data_item in query_results: for data_item in query_results:
pick_id_list.append(data_item.pick_id) pick_id_list.append(data_item.pick_id)
......
...@@ -27,3 +27,19 @@ class FaceUserContrastSimilar(models.Model): ...@@ -27,3 +27,19 @@ class FaceUserContrastSimilar(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
contrast_user_id = models.IntegerField(verbose_name=u'对照者用户ID') contrast_user_id = models.IntegerField(verbose_name=u'对照者用户ID')
class UserSimilarScore(models.Model):
class Meta:
verbose_name=u"首页推荐用"
db_table="user_similar_score"
id = models.IntegerField(verbose_name=u"主键ID",primary_key=True)
is_deleted = models.BooleanField(verbose_name=u"是否删除")
user_id = models.IntegerField(verbose_name=u"用户ID")
contrast_user_id = models.BigIntegerField(verbose_name="参数对比的用户id", db_index=True)
score = models.FloatField(verbose_name='相似度', default=0)
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
...@@ -27,15 +27,15 @@ class Group(models.Model): ...@@ -27,15 +27,15 @@ class Group(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
#获取前一天4,5星发帖数 # #获取前一天4,5星发帖数
def get_high_quality_topic_num(self): # def get_high_quality_topic_num(self):
yesterday = datetime.datetime.now()-datetime.timedelta(days=1) # yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day) # yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day) # yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count() # topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
return topic_num # return topic_num
def detail(self): def detail(self):
result = { result = {
......
from django.db import models
import datetime
import logging
import traceback
from .tag import Tag
from .topic import Topic
class PictorialFollow(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报用户关系"
app_label = "community"
db_table = "community_pictorial_follow"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
user_id = models.BigIntegerField(verbose_name=u'用户ID')
class PictorialTopics(models.Model):
"""画报帖子关系"""
class Meta:
verbose_name = u'画报帖子关系'
app_label = 'community'
db_table = 'community_pictorial_topic'
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
class Pictorial(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报"
app_label = "community"
db_table = "community_pictorial"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
is_recommend = models.BooleanField(verbose_name=u'推荐')
name = models.CharField(verbose_name=u'画报名称', max_length=100)
description = models.CharField(verbose_name=u'画报描述', max_length=200)
creator_id = models.BigIntegerField(verbose_name=u'画报用户ID')
icon = models.CharField(verbose_name=u'画报名称', max_length=255)
topic_num = models.IntegerField(verbose_name=u'次数')
def get_topic_id(self):
try:
topic_id =[]
topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
for i in topic_id_list:
topic_id.append(i)
return topic_id
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
# 获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
try:
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
topic_num = Topic.objects.filter(content_level__in=["4", "5"], create_time__gte=yesterday_begin_time,
create_time__lte=yesterday_end_time, id__in=topic_id_list).count()
return topic_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def get_tag_by_id(self):
try:
tag_id_list = []
tags = PictorialTag.objects.filter(pictorial_id=self.id, is_online=True).values_list("tag_id", flat=True)
for i in tags:
tag_id_list.append(i)
return tag_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def get_tag_by_name(self, tag_id):
try:
tag_name_list = []
tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True)
for i in tags:
tag_name_list.append(i)
logging.info("get tags name i:%s" % i)
return tag_name_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
class PictorialTag(models.Model):
"""画报关注标签"""
class Meta:
verbose_name = u"画报标签"
app_label = "community"
db_table = "community_pictorial_tag"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID', max_length=20)
tag_id = models.BigIntegerField(verbose_name=u'标签ID', max_length=20)
is_online = models.BooleanField(verbose_name=u'是否上线', max_length=1)
...@@ -45,6 +45,9 @@ class Tag(models.Model): ...@@ -45,6 +45,9 @@ class Tag(models.Model):
name = models.CharField(verbose_name=u"标签名称",max_length=128) name = models.CharField(verbose_name=u"标签名称",max_length=128)
description = models.TextField(verbose_name=u"标签描述") description = models.TextField(verbose_name=u"标签描述")
icon_url=models.CharField(verbose_name=u"icon_url",max_length=120) icon_url=models.CharField(verbose_name=u"icon_url",max_length=120)
collection = models.IntegerField(verbose_name=u"是否编辑")
is_ai = models.IntegerField(verbose_name=u"是否ai")
is_own = models.IntegerField(verbose_name=u"是否ins上自带")
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.conf import settings from django.conf import settings
from django.core.management.base import BaseCommand, CommandError from django.core.management.base import BaseCommand, CommandError
...@@ -9,35 +10,37 @@ from libs.es import ESPerform ...@@ -9,35 +10,37 @@ from libs.es import ESPerform
from django.db import models from django.db import models
import datetime import datetime
from alpha_types.venus import GRAP_PLATFORM
from .pick_topic import PickTopic from .pick_topic import PickTopic
from .tag import TopicTag,Tag from .tag import TopicTag, Tag
from .user_extra import UserExtra from .user_extra import UserExtra
from .group import Group from .group import Group
class ActionSumAboutTopic(models.Model): class ActionSumAboutTopic(models.Model):
class Meta: class Meta:
verbose_name=u"帖子埋点数据汇总" verbose_name = u"帖子埋点数据汇总"
db_table="action_sum_about_topic" db_table = "action_sum_about_topic"
partiton_date = models.CharField(verbose_name=u"日期",max_length=20) partiton_date = models.CharField(verbose_name=u"日期", max_length=20)
device_id = models.CharField(verbose_name=u"用户设备号",max_length=50) device_id = models.CharField(verbose_name=u"用户设备号", max_length=50)
topic_id = models.CharField(verbose_name=u"帖子ID",max_length=50) topic_id = models.CharField(verbose_name=u"帖子ID", max_length=50)
user_id = models.CharField(verbose_name=u"用户ID",max_length=50) user_id = models.CharField(verbose_name=u"用户ID", max_length=50)
data_type = models.IntegerField(verbose_name=u"动作类型") data_type = models.IntegerField(verbose_name=u"动作类型")
data_value = models.BigIntegerField(verbose_name=u"值") data_value = models.BigIntegerField(verbose_name=u"值")
class TopicImage(models.Model): class TopicImage(models.Model):
class Meta: class Meta:
verbose_name = u'日记图片' verbose_name = u'日记图片'
db_table = 'topic_image' db_table = 'topic_image'
id = models.IntegerField(verbose_name='日记图片ID',primary_key=True) id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
topic_id = models.IntegerField(verbose_name=u'日记ID') topic_id = models.IntegerField(verbose_name=u'日记ID')
url = models.CharField(verbose_name=u'图片URL',max_length=300) url = models.CharField(verbose_name=u'图片URL', max_length=300)
is_online = models.BooleanField(verbose_name='是否上线') is_online = models.BooleanField(verbose_name='是否上线')
is_deleted = models.BooleanField(verbose_name='是否删除') is_deleted = models.BooleanField(verbose_name='是否删除')
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
...@@ -46,33 +49,67 @@ class Topic(models.Model): ...@@ -46,33 +49,67 @@ class Topic(models.Model):
verbose_name = u'日记' verbose_name = u'日记'
db_table = 'topic' db_table = 'topic'
id = models.IntegerField(verbose_name=u'日记ID',primary_key=True) id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
name = models.CharField(verbose_name=u'日记名称',max_length=100) name = models.CharField(verbose_name=u'日记名称', max_length=100)
#group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1) # group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
group = models.ForeignKey( # group = models.ForeignKey(
Group, verbose_name=u"关联的小组", related_name=u"group_topics",null=True, blank=True, default=None, on_delete=models.CASCADE) # Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
# on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID') user_id = models.IntegerField(verbose_name=u'用户ID')
drop_score = models.IntegerField(verbose_name=u'人工赋分',default=0) has_video = models.BooleanField(verbose_name=u'是否是视频日记')
description = models.CharField(verbose_name=u'日记本描述',max_length=200) drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
content = models.CharField(verbose_name=u'日记本内容',max_length=1000) description = models.CharField(verbose_name=u'日记本描述', max_length=200)
content = models.CharField(verbose_name=u'日记本内容', max_length=1000)
share_num = models.IntegerField(verbose_name='') share_num = models.IntegerField(verbose_name='')
vote_num = models.IntegerField(verbose_name=u'点赞数') vote_num = models.IntegerField(verbose_name=u'点赞数')
reply_num = models.IntegerField(verbose_name=u'回复数') reply_num = models.IntegerField(verbose_name=u'回复数')
cover = models.CharField(verbose_name='',max_length=200) cover = models.CharField(verbose_name='', max_length=200)
is_online = models.BooleanField(verbose_name=u'是否上线') is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除') is_deleted = models.BooleanField(verbose_name=u'是否删除')
content_level = models.CharField(verbose_name=u'内容等级',max_length=3) content_level = models.CharField(verbose_name=u'内容等级', max_length=3)
create_time = models.DateTimeField(verbose_name=u'日记创建时间',default=datetime.datetime.fromtimestamp(0)) language_type = models.IntegerField(verbose_name=u'语种类型')
create_time = models.DateTimeField(verbose_name=u'日记创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'日记更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'日记更新时间', default=datetime.datetime.fromtimestamp(0))
is_crawl = models.BooleanField(verbose_name="是否是爬回的数据", default=False)
has_image = models.BooleanField(verbose_name=u"是否有图片", default=False)
virtual_content_level = models.CharField(verbose_name=u'模拟内容等级', max_length=100, default='')
like_num_crawl = models.IntegerField(verbose_name=u'爬取点赞数', default=0)
comment_num_crawl = models.IntegerField(verbose_name=u'爬取评论数', default=0)
is_recommend = models.IntegerField(verbose_name=u'是否推荐 0 非推荐 1 推荐', default=0)
sort_score = models.FloatField(verbose_name='排序分', default=0)
is_shadow = models.BooleanField(verbose_name="是否是马甲账户", default=False)
platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA)
platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True)
def get_pictorial_id(self):
try:
pictorial_id_list =[]
pictorial_id = PictorialTopic.objects.filter(topic_id=self.id).values_list("pictorial_id",flat=True)
for i in pictorial_id:
pictorial_id_list.append(i)
return pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@property
def is_complaint(self):
"""是否被举报"""
if TopicComplaint.objects.filter(topic_id=self.id, is_online=True).exists():
return True
return False
def topic_has_image(self): def topic_has_image(self):
try: try:
has_image = False has_image = False
query_list = TopicImage.objects.filter(topic_id=self.id,is_deleted=False,is_online=True) query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False,
if len(query_list)>0: is_online=True)
has_image=True if len(query_list) > 0:
has_image = True
return has_image return has_image
except: except:
...@@ -82,7 +119,7 @@ class Topic(models.Model): ...@@ -82,7 +119,7 @@ class Topic(models.Model):
def get_pick_id_info(self): def get_pick_id_info(self):
try: try:
pick_id_list = list() pick_id_list = list()
query_list = PickTopic.objects.filter(topic_id=self.id,is_deleted=False) query_list = PickTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False)
for item in query_list: for item in query_list:
pick_id_list.append(item.pick_id) pick_id_list.append(item.pick_id)
...@@ -94,23 +131,33 @@ class Topic(models.Model): ...@@ -94,23 +131,33 @@ class Topic(models.Model):
def get_topic_tag_id_list(self): def get_topic_tag_id_list(self):
try: try:
topic_tag_id_list = list() topic_tag_id_list = list()
edit_tag_id_list = list()
query_results = TopicTag.objects.filter(topic_id=self.id)
for item in query_results: tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id",
topic_tag_id_list.append(item.tag_id) flat=True)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list)
return topic_tag_id_list for tag_item in tag_query_results:
is_online = tag_item.is_online
is_deleted = tag_item.is_deleted
collection = tag_item.collection
if is_online and not is_deleted:
topic_tag_id_list.append(tag_item.id)
if collection:
edit_tag_id_list.append(tag_item.id)
return (topic_tag_id_list, edit_tag_id_list)
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return ([], [])
def get_tag_name_list(self,tag_id_list): def get_tag_name_list(self, tag_id_list):
try: try:
tag_name_list = list() tag_name_list = list()
query_results = Tag.objects.filter(id__in=tag_id_list).values_list("name",flat=True) for i in range(0, len(tag_name_list), 1000):
query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list[i:i + 1000])
for item in query_results: for item in query_results:
tag_name_list.append(item) tag_name_list.append(item)
return tag_name_list return tag_name_list
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
...@@ -121,8 +168,8 @@ class Topic(models.Model): ...@@ -121,8 +168,8 @@ class Topic(models.Model):
offline_score = 0.0 offline_score = 0.0
user_is_shadow = False user_is_shadow = False
#是否官方推荐用户 # 是否官方推荐用户
user_query_results = UserExtra.objects.filter(user_id=self.user_id) user_query_results = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id)
if user_query_results.count() > 0: if user_query_results.count() > 0:
if user_query_results[0].is_recommend: if user_query_results[0].is_recommend:
offline_score += 2.0 offline_score += 2.0
...@@ -130,32 +177,70 @@ class Topic(models.Model): ...@@ -130,32 +177,70 @@ class Topic(models.Model):
user_is_shadow = True user_is_shadow = True
# 是否官方推荐小组 # 是否官方推荐小组
if self.group and self.group.is_recommend: # if self.group and self.group.is_recommend:
offline_score += 4.0 # offline_score += 4.0
#帖子等级 # 帖子等级
if self.content_level == '5': if self.content_level == '5':
offline_score += 5.0 offline_score += 6.0
elif self.content_level == '4': elif self.content_level == '4':
offline_score += 3.0 offline_score += 5.0
elif self.content_level == '3': elif self.content_level == '3':
offline_score += 2.0 offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.filter(topic_id=self.id,data_type=1).count() exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
click_count = ActionSumAboutTopic.objects.filter(topic_id=self.id, data_type=2).count() data_type=1).count()
uv_num = ActionSumAboutTopic.objects.filter(topic_id=self.id,data_type=3).count() click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=3).count()
if exposure_count>0: if exposure_count > 0:
offline_score += click_count/exposure_count offline_score += click_count / exposure_count
if uv_num>0: if uv_num > 0:
offline_score += (self.vote_num/uv_num + self.reply_num/uv_num) offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
""" """
1:马甲账号是否对总分降权? 1:马甲账号是否对总分降权?
""" """
if user_is_shadow: if user_is_shadow:
offline_score = offline_score*0.5 offline_score = offline_score * 0.5
offline_score -= self.drop_score
return offline_score return offline_score
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0 return 0.0
class TopicComplaint(models.Model):
"""帖子举报"""
class Meta:
verbose_name = u'帖子举报'
app_label = 'community'
db_table = 'topic_complaint'
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True)
topic = models.ForeignKey(
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE,
related_name='complaints')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
class PictorialTopic(models.Model):
"""画报帖子关系"""
class Meta:
verbose_name = u'画报帖子关系'
app_label = 'community'
db_table = 'community_pictorial_topic'
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
...@@ -15,14 +15,14 @@ from .group_user_role import GroupUserRole ...@@ -15,14 +15,14 @@ from .group_user_role import GroupUserRole
from .tag import AccountUserTag from .tag import AccountUserTag
from .topic import Topic from .topic import Topic
from .user_extra import UserExtra from .user_extra import UserExtra
from .pictorial import PictorialFollow
class User(models.Model): class User(models.Model):
class Meta: class Meta:
verbose_name = u'用户' verbose_name = u'用户'
db_table = 'account_user' db_table = 'account_user'
id = models.IntegerField(verbose_name="主键ID",primary_key=True) id = models.IntegerField(verbose_name="主键ID", primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户id', unique=True) user_id = models.BigIntegerField(verbose_name=u'用户id', unique=True)
nick_name = models.CharField(verbose_name=u'昵称', max_length=255, default='') nick_name = models.CharField(verbose_name=u'昵称', max_length=255, default='')
profile_pic = models.CharField(verbose_name=u'头像', max_length=300) profile_pic = models.CharField(verbose_name=u'头像', max_length=300)
...@@ -31,43 +31,45 @@ class User(models.Model): ...@@ -31,43 +31,45 @@ class User(models.Model):
country_id = models.CharField(verbose_name=u'国家id', max_length=40) country_id = models.CharField(verbose_name=u'国家id', max_length=40)
is_online = models.BooleanField(verbose_name="是否上线") is_online = models.BooleanField(verbose_name="是否上线")
is_deleted = models.BooleanField(verbose_name='是否删除') is_deleted = models.BooleanField(verbose_name='是否删除')
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0)) create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0)) update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
def get_is_recommend_flag(self): def get_is_recommend_flag(self):
is_shadow = False is_shadow = False
is_recommend = False is_recommend = False
query_sql = UserExtra.objects.filter(user_id=self.user_id,is_deleted=False,is_online=True) query_sql = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id, is_deleted=False, is_online=True)
for record in query_sql: for record in query_sql:
is_recommend = record.is_recommend is_recommend = record.is_recommend
is_shadow = record.is_shadow is_shadow = record.is_shadow
return (is_recommend,is_shadow) return (is_recommend, is_shadow)
def get_latest_topic_time_val(self): def get_latest_topic_time_val(self):
latest_topic_time_val = -1 latest_topic_time_val = -1
# 获取该用户最新发帖时间 # 获取该用户最新发帖时间
topic_records = Topic.objects.filter(user_id=self.user_id).order_by("-update_time").values_list("update_time",flat=True).first() topic_records = Topic.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).order_by("-update_time").values_list("update_time",
flat=True).first()
if topic_records: if topic_records:
tzlc_topic_update_time = tzlc(topic_records) tzlc_topic_update_time = tzlc(topic_records)
latest_topic_time_val = int(time.mktime(tzlc_topic_update_time.timetuple())) latest_topic_time_val = int(time.mktime(tzlc_topic_update_time.timetuple()))
return latest_topic_time_val return latest_topic_time_val
def get_follow_user_id_list(self): def get_follow_user_id_list(self):
follow_user_id_list = list() follow_user_id_list = list()
user_follows = self.userfollow.filter(is_online=True) user_follows = self.userfollow.filter(is_online=True)
for user_follow in user_follows: for user_follow in user_follows:
follow_user_id_list.append(user_follow.follow_id) follow_user_id_list.append(user_follow.follow_id)
follow_user_detail_list = list() follow_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=follow_user_id_list) logging.info("get follow_user_id_list :%s" % follow_user_id_list)
for i in range(0, len(follow_user_id_list), 1000):
logging.info("get follow_user_id_list :%s" % follow_user_id_list[i:i + 1000])
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=follow_user_id_list[i:i + 1000])
for detail_data in sql_data_list: for detail_data in sql_data_list:
item = { item = {
"user_id":detail_data.user_id, "user_id": detail_data.user_id,
"country_id":detail_data.country_id "country_id": detail_data.country_id
} }
follow_user_detail_list.append(item) follow_user_detail_list.append(item)
...@@ -76,11 +78,11 @@ class User(models.Model): ...@@ -76,11 +78,11 @@ class User(models.Model):
def get_attention_group_id_list(self): def get_attention_group_id_list(self):
try: try:
attention_group_id_list = list() attention_group_id_list = list()
query_results = GroupUserRole.objects.filter(is_online=True,user_id=self.user_id) query_results = GroupUserRole.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True, user_id=self.user_id)
for item in query_results: for item in query_results:
item_dict = { item_dict = {
"group_id": item.group_id, "group_id": item.group_id,
"update_time_val":time.mktime(tzlc(item.update_time).timetuple()) "update_time_val": time.mktime(tzlc(item.update_time).timetuple())
} }
attention_group_id_list.append(item_dict) attention_group_id_list.append(item_dict)
...@@ -89,55 +91,87 @@ class User(models.Model): ...@@ -89,55 +91,87 @@ class User(models.Model):
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
def get_attention_pictorial_id_list(self):
try:
attention_pictorial_id_list = list()
query_results = PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True, user_id=self.user_id)
logging.info("get PictorialFollow:%s"%query_results)
for item in query_results:
item_dict = {
"pictorial_id": item.pictorial_id,
"update_time_val": time.mktime(tzlc(item.update_time).timetuple())
}
attention_pictorial_id_list.append(item_dict)
logging.info("get user_id:%s" %self.user_id)
logging.info("get same_pictorial_user_id_list:%s" % attention_pictorial_id_list)
return attention_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_pick_user_id_list(self): def get_pick_user_id_list(self):
pick_topic_id_list = list() pick_topic_id_list = list()
user_picks = self.user_pick.filter(is_deleted=False,is_pick=True) user_picks = self.user_pick.using(settings.SLAVE_DB_NAME).filter(is_deleted=False, is_pick=True)
for user_pick in user_picks: for user_pick in user_picks:
pick_topic_id_list.append(user_pick.picktopic_id) pick_topic_id_list.append(user_pick.picktopic_id)
pick_user_id_list = [] pick_user_id_list = []
topic_sql_list = Topic.objects.filter(id__in=pick_topic_id_list) for i in range(0, len(pick_topic_id_list), 1000):
topic_sql_list = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=pick_topic_id_list[i:i + 1000])
for topic_data in topic_sql_list: for topic_data in topic_sql_list:
pick_user_id_list.append(topic_data.user_id) pick_user_id_list.append(topic_data.user_id)
pick_user_id_list = tuple(pick_user_id_list) pick_user_id_list = tuple(pick_user_id_list)
pick_user_detail_list = list() pick_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=pick_user_id_list)
for i in range(0, len(pick_user_id_list), 1000):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=pick_user_id_list[i:i + 1000])
for detail_data in sql_data_list: for detail_data in sql_data_list:
item = { item = {
"user_id":detail_data.user_id, "user_id": detail_data.user_id,
"country_id":detail_data.country_id "country_id": detail_data.country_id
} }
pick_user_detail_list.append(item) pick_user_detail_list.append(item)
return pick_user_detail_list return pick_user_detail_list
def get_same_group_user_id_list(self): def get_same_pictorial_user_id_list(self):
same_group_user_id_list = list()
group_items_list = GroupUserRole.objects.filter(user_id=self.user_id) #todo 有的同组数据过大,导致celery cpu过高,暂时限制同组的数据大小,后续可能会去掉同组的数据
for group_item in group_items_list: same_pictorial_user_id_list = list()
group_id = group_item.group_id pictorial_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).values_list("pictorial_id",flat=True))
user_items_list = GroupUserRole.objects.filter(group_id=group_id) for pictorial_id in pictorial_items_list:
for user_items_list in user_items_list: user_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(pictorial_id=pictorial_id).values_list("user_id",flat=True))
same_group_user_id_list.append(user_items_list.user_id) for user_id in user_items_list:
same_pictorial_user_id_list.append(user_id)
same_group_detail_list = list() if len(same_pictorial_user_id_list)>=100:
sql_data_list = User.objects.filter(user_id__in=same_group_user_id_list) break
if len(same_pictorial_user_id_list)>=100:
break
logging.info("get same user_id:%s"%self.user_id)
logging.info("get same_pictorial_user_id_list:%s"%same_pictorial_user_id_list)
same_pictorial_detail_list = list()
for i in range(0, len(same_pictorial_user_id_list), 200):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=same_pictorial_user_id_list[i:i + 1000])
for detail_data in sql_data_list: for detail_data in sql_data_list:
item = { item = {
"user_id":detail_data.user_id, "user_id": detail_data.user_id,
"country_id":detail_data.country_id "country_id": detail_data.country_id
} }
same_group_detail_list.append(item) same_pictorial_detail_list.append(item)
return same_group_detail_list
return same_pictorial_detail_list
def get_user_tag_id_list(self): def get_user_tag_id_list(self):
try: try:
user_tag_id_list = list() user_tag_id_list = list()
query_results = AccountUserTag.objects.filter(user=self.user_id,is_deleted=False) query_results = AccountUserTag.objects.using(settings.SLAVE_DB_NAME).filter(user=self.user_id, is_deleted=False)
for item in query_results: for item in query_results:
user_tag_id_list.append(item.tag_id) user_tag_id_list.append(item.tag_id)
...@@ -145,4 +179,3 @@ class User(models.Model): ...@@ -145,4 +179,3 @@ class User(models.Model):
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return [] return []
...@@ -12,11 +12,12 @@ import elasticsearch ...@@ -12,11 +12,12 @@ import elasticsearch
import elasticsearch.helpers import elasticsearch.helpers
import sys import sys
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer from trans2es.utils.group_transfer import GroupTransfer
from trans2es.utils.topic_transfer import TopicTransfer from trans2es.utils.topic_transfer import TopicTransfer
from trans2es.utils.pictorial_transfer import PictorialTransfer
from trans2es.utils.celebrity_transfer import CelebrityTransfer from trans2es.utils.celebrity_transfer import CelebrityTransfer
from trans2es.utils.tag_transfer import TagTransfer from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.contrast_similar_transfer import Contrast_Similar_Transfer from trans2es.utils.contrast_similar_transfer import Contrast_Similar_Transfer
...@@ -172,21 +173,41 @@ class TypeInfo(object): ...@@ -172,21 +173,41 @@ class TypeInfo(object):
) )
def insert_table_by_pk_list(self, sub_index_name, pk_list, es=None, use_batch_query_set=False): def insert_table_by_pk_list(self, sub_index_name, pk_list, es=None, use_batch_query_set=False):
begin = time.time()
if use_batch_query_set: if use_batch_query_set:
qs = self.queryset qs = self.queryset
else: else:
qs = self.model.objects.all() qs = self.model.objects.all()
end = time.time()
time0=end-begin
begin = time.time()
instance_list = qs.filter(pk__in=pk_list) instance_list = qs.filter(pk__in=pk_list)
end = time.time()
time1=end-begin
begin = time.time()
data_list = self.bulk_get_data(instance_list) data_list = self.bulk_get_data(instance_list)
end = time.time()
time2=end-begin
begin = time.time()
logging.info("get sub_index_name:%s"%sub_index_name)
logging.info("get data_list:%s"%data_list)
self.elasticsearch_bulk_insert_data( self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name, sub_index_name=sub_index_name,
data_list=data_list, data_list=data_list,
es=es, es=es,
) )
end = time.time()
time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
try:
start_clock = time.clock() start_clock = time.clock()
start_time = time.time() start_time = time.time()
...@@ -205,6 +226,8 @@ class TypeInfo(object): ...@@ -205,6 +226,8 @@ class TypeInfo(object):
auto_create_index=True auto_create_index=True
) )
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
stage_3_time = time.time() stage_3_time = time.time()
end_clock = time.clock() end_clock = time.clock()
...@@ -223,6 +246,9 @@ class TypeInfo(object): ...@@ -223,6 +246,9 @@ class TypeInfo(object):
clock_duration=end_clock - start_clock, clock_duration=end_clock - start_clock,
response=es_result, response=es_result,
) )
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
_get_type_info_map_result = None _get_type_info_map_result = None
...@@ -234,6 +260,7 @@ def get_type_info_map(): ...@@ -234,6 +260,7 @@ def get_type_info_map():
return _get_type_info_map_result return _get_type_info_map_result
type_info_list = [ type_info_list = [
TypeInfo( TypeInfo(
name='topic', # 日记 name='topic', # 日记
type='topic', type='topic',
...@@ -244,6 +271,16 @@ def get_type_info_map(): ...@@ -244,6 +271,16 @@ def get_type_info_map():
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2, round_insert_period=2,
), ),
TypeInfo(
name='topic-v1', # 日记
type='topic-v1',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo( TypeInfo(
name="user", # 用户 name="user", # 用户
type="user", type="user",
...@@ -308,7 +345,18 @@ def get_type_info_map(): ...@@ -308,7 +345,18 @@ def get_type_info_map():
bulk_insert_chunk_size=100, bulk_insert_chunk_size=100,
round_insert_chunk_size=5, round_insert_chunk_size=5,
round_insert_period=2 round_insert_period=2
),
TypeInfo(
name="pictorial", # 画报
type="pictorial",
model=pictorial.Pictorial,
query_deferred=lambda: pictorial.Pictorial.objects.all().query,
get_data_func=PictorialTransfer.get_poctorial_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
) )
] ]
type_info_map = { type_info_map = {
...@@ -318,3 +366,4 @@ def get_type_info_map(): ...@@ -318,3 +366,4 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map _get_type_info_map_result = type_info_map
return type_info_map return type_info_map
...@@ -33,7 +33,7 @@ class GroupTransfer(object): ...@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time) tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num() # res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res return res
except: except:
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc
class PictorialTransfer(object):
def __init__(self):
pass
@classmethod
def get_poctorial_data(cls, instance):
try:
res = dict()
res["id"] = instance.id
res["is_online"] = instance.is_online
res["is_deleted"] = instance.is_deleted
res["is_recommend"] = instance.is_recommend
res["name"] = instance.name
res["description"] = instance.description
res["topic_num"] = instance.topic_num
res["creator_id"] = instance.creator_id
res["icon"] = instance.icon
create_time = instance.create_time
tzlc_create_time = tzlc(create_time)
res["create_time"] = tzlc_create_time
update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
tag_id = instance.get_tag_by_id()
res["tag_id"] = tag_id
res["tag_name"] = instance.get_tag_by_name(tag_id)
res["topic_id_list"] =instance.get_topic_id()
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
...@@ -10,6 +10,9 @@ from libs.tools import tzlc ...@@ -10,6 +10,9 @@ from libs.tools import tzlc
from trans2es.models.topic import Topic from trans2es.models.topic import Topic
from trans2es.models.tag import TopicTag,CommunityTagType,CommunityTagTypeRelation from trans2es.models.tag import TopicTag,CommunityTagType,CommunityTagTypeRelation
import datetime import datetime
from django.conf import settings
class TagTransfer(object): class TagTransfer(object):
...@@ -38,26 +41,34 @@ class TagTransfer(object): ...@@ -38,26 +41,34 @@ class TagTransfer(object):
res["is_online"] = instance.is_online res["is_online"] = instance.is_online
res["is_deleted"] = instance.is_deleted res["is_deleted"] = instance.is_deleted
res["near_new_topic_num"] = 0 topic_num = 0
res["near_new_topic_num"] = topic_num
if instance.is_online==True and instance.is_deleted==False: if instance.is_online==True and instance.is_deleted==False:
topic_id_list = list() topic_id_list = list()
sql_result_list = TopicTag.objects.filter(tag_id=instance.id).values_list("topic_id",flat=True) sql_result_results = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(
for item_id in sql_result_list: tag_id=instance.id).values_list("topic_id", "is_online"))
topic_id_list.append(item_id) for topic_id,is_online in sql_result_results:
if is_online:
topic_id_list.append(topic_id)
time_base_val = datetime.datetime.strftime(datetime.datetime.now()+datetime.timedelta(-7), "%Y-%m-%d") time_base_val = datetime.datetime.strftime(datetime.datetime.now()+datetime.timedelta(-7), "%Y-%m-%d")
topic_num = Topic.objects.filter(id__in=topic_id_list,create_time__gte=time_base_val).count()
for topic_begin_index in range(0,len(topic_id_list),100):
cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_id_list[topic_begin_index:topic_begin_index+100],create_time__gte=time_base_val).count()
topic_num += cur_topic_num
res["near_new_topic_num"] = topic_num res["near_new_topic_num"] = topic_num
tag_type_sql_list = CommunityTagTypeRelation.objects.filter(tag_id=instance.id).values_list("tag_type_id",flat=True) tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter(tag_id=instance.id).values_list("tag_type_id",flat=True)
tag_type_list = list() tag_type_list = list()
for tag_type_id in tag_type_sql_list: for tag_type_id in tag_type_sql_list:
tag_type_list.append(tag_type_id) tag_type_list.append(tag_type_id)
res["tag_type"] = tag_type_list res["tag_type"] = tag_type_list
res["collection"] = instance.collection
res["is_ai"] = instance.is_ai
res["is_own"] = instance.is_own
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
...@@ -6,6 +6,8 @@ import logging ...@@ -6,6 +6,8 @@ import logging
import traceback import traceback
from libs.tools import tzlc from libs.tools import tzlc
import time import time
import re
import datetime
class TopicTransfer(object): class TopicTransfer(object):
...@@ -26,19 +28,60 @@ class TopicTransfer(object): ...@@ -26,19 +28,60 @@ class TopicTransfer(object):
res["content_level"] = instance.content_level res["content_level"] = instance.content_level
res["user_id"] = instance.user_id res["user_id"] = instance.user_id
if instance.group: # if instance.group:
res["group_id"] = instance.group.id # res["group_id"] = instance.group.id
else: # else:
res["group_id"] = -1 # res["group_id"] = -1
res["pictorial_id"] = instance.get_pictorial_id()
res["share_num"] = instance.share_num res["share_num"] = instance.share_num
begin = time.time()
res["pick_id_list"] = instance.get_pick_id_info() res["pick_id_list"] = instance.get_pick_id_info()
res["tag_list"] = instance.get_topic_tag_id_list() end = time.time()
time0 = (end-begin)
begin = time.time()
(topic_tag_id_list, edit_tag_id_list) = instance.get_topic_tag_id_list()
res["tag_list"] = topic_tag_id_list
res["edit_tag_list"] = edit_tag_id_list
end = time.time()
time1 = (end-begin)
begin = time.time()
res["tag_name_list"] = instance.get_tag_name_list(res["tag_list"]) res["tag_name_list"] = instance.get_tag_name_list(res["tag_list"])
end = time.time()
time2 = (end-begin)
begin = time.time()
res["offline_score"] = instance.get_topic_offline_score() res["offline_score"] = instance.get_topic_offline_score()
end = time.time()
time3 = (end-begin)
begin = time.time()
res["manual_score"] = instance.drop_score res["manual_score"] = instance.drop_score
res["has_image"] = instance.topic_has_image() res["has_image"] = instance.topic_has_image()
res["has_video"] = instance.has_video
res["language_type"] = instance.language_type
end = time.time()
time4 = (end-begin)
# # 片假名
# re_jp_pian_words = re.compile(u"[\u30a0-\u30ff]+")
# m_pian = re_jp_pian_words.search(instance.content, 0)
#
# # 平假名
# re_jp_ping_words = re.compile(u"[\u3040-\u309f]+")
# m_ping = re_jp_ping_words.search(instance.content, 0)
# if m_pian or m_ping:
# res["language_type"] = 10
# else:
# res["language_type"] = instance.language_type
create_time = instance.create_time create_time = instance.create_time
tzlc_create_time = tzlc(create_time) tzlc_create_time = tzlc(create_time)
...@@ -48,9 +91,10 @@ class TopicTransfer(object): ...@@ -48,9 +91,10 @@ class TopicTransfer(object):
update_time = instance.update_time update_time = instance.update_time
tzlc_update_time = tzlc(update_time) tzlc_update_time = tzlc(update_time)
res["update_time"] = tzlc_update_time # res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple())) res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res return res
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
...@@ -7,30 +7,33 @@ import traceback ...@@ -7,30 +7,33 @@ import traceback
import time import time
from libs.tools import tzlc from libs.tools import tzlc
from trans2es.models.user import User from trans2es.models.user import User
from django.conf import settings
class UserTransfer(object): class UserTransfer(object):
@classmethod @classmethod
def get_follow_user_id_list(cls,userInstance): def get_follow_user_id_list(cls, userInstance):
follow_user_id_list = list() follow_user_id_list = list()
user_follows = userInstance.userfollow.filter(is_online=True) user_follows = userInstance.userfollow.filter(is_online=True)
for user_follow in user_follows: for user_follow in user_follows:
follow_user_id_list.append(user_follow.follow_id) follow_user_id_list.append(user_follow.follow_id)
follow_user_detail_list = list() follow_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=follow_user_id_list,is_online=True,is_deleted=False) for i in range(0, len(follow_user_id_list), 1000):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=follow_user_id_list[i:i + 1000], is_online=True,
is_deleted=False)
for detail_data in sql_data_list: for detail_data in sql_data_list:
item = { item = {
"user_id":detail_data.user_id, "user_id": detail_data.user_id,
"country_id":detail_data.country_id "country_id": detail_data.country_id
} }
follow_user_detail_list.append(item) follow_user_detail_list.append(item)
return follow_user_detail_list return follow_user_detail_list
@classmethod @classmethod
def get_user_data(cls,instance): def get_user_data(cls, instance):
try: try:
res = dict() res = dict()
...@@ -71,16 +74,25 @@ class UserTransfer(object): ...@@ -71,16 +74,25 @@ class UserTransfer(object):
try: try:
res["tag_list"] = instance.get_user_tag_id_list() res["tag_list"] = instance.get_user_tag_id_list()
res["attention_user_id_list"] = cls.get_follow_user_id_list(userInstance=instance) res["attention_user_id_list"] = cls.get_follow_user_id_list(userInstance=instance)
res["attention_group_id_list"] = instance.get_attention_group_id_list()
# res["attention_group_id_list"] = instance.get_attention_group_id_list()
res["pick_user_id_list"] = instance.get_pick_user_id_list() res["pick_user_id_list"] = instance.get_pick_user_id_list()
res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["attention_pictorial_id_list"] = instance.get_attention_pictorial_id_list()
# res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["same_pictorial_user_id_list"] = instance.get_same_pictorial_user_id_list()
except: except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc()) logging.error("catch exception,err_msg:%s" % traceback.format_exc())
res["tag_list"] = [] res["tag_list"] = []
res["attention_user_id_list"] = [] res["attention_user_id_list"] = []
res["attention_group_id_list"] = [] # res["attention_group_id_list"] = []
res["pick_user_id_list"] = [] res["pick_user_id_list"] = []
res["same_group_user_id_list"] = [] # res["same_group_user_id_list"] = []
res["attention_pictorial_id_list"] = []
res["same_pictorial_user_id_list"] = []
return res return res
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment