Commit 165eded0 authored by lixiaofang's avatar lixiaofang

add

parents 79122e95 34a89a76
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
<DBN-PSQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false" />
</DBN-PSQL>
<DBN-SQL>
<case-options enabled="false">
<option name="KEYWORD_CASE" value="lower" />
<option name="FUNCTION_CASE" value="lower" />
<option name="PARAMETER_CASE" value="lower" />
<option name="DATATYPE_CASE" value="lower" />
<option name="OBJECT_CASE" value="preserve" />
</case-options>
<formatting-settings enabled="false">
<option name="STATEMENT_SPACING" value="one_line" />
<option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
<option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
</formatting-settings>
</DBN-SQL>
</code_scheme>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e" name="Default Changelist" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FUSProjectUsageTrigger">
<session id="-127591351">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.open.time.1" value="1" />
<entry key="project.opened" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="Python" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="py" value="28" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="Python" value="28" />
</counts>
</usages-collector>
</session>
<session id="1569122105">
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="py" value="2" />
<entry key="template" value="1" />
<entry key="xml" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="PLAIN_TEXT" value="1" />
<entry key="Python" value="2" />
<entry key="XML" value="1" />
</counts>
</usages-collector>
</session>
<session id="-609148713">
<usages-collector id="statistics.lifecycle.project">
<counts>
<entry key="project.closed" value="1" />
<entry key="project.open.time.2" value="1" />
<entry key="project.open.time.8" value="1" />
<entry key="project.opened" value="2" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.open">
<counts>
<entry key="json" value="8" />
<entry key="py" value="69" />
<entry key="template" value="3" />
<entry key="xml" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.open">
<counts>
<entry key="JSON" value="8" />
<entry key="PLAIN_TEXT" value="3" />
<entry key="Python" value="69" />
<entry key="XML" value="1" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.extensions.edit">
<counts>
<entry key="json" value="56" />
<entry key="py" value="888" />
<entry key="template" value="5" />
</counts>
</usages-collector>
<usages-collector id="statistics.file.types.edit">
<counts>
<entry key="JSON" value="56" />
<entry key="PLAIN_TEXT" value="5" />
<entry key="Python" value="888" />
</counts>
</usages-collector>
</session>
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="15">
=======
<state relative-caret-position="823">
>>>>>>> huabao
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="265">
<caret line="337" column="24" selection-start-line="337" selection-start-column="24" selection-end-line="337" selection-end-column="24" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/models/pictorial.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="307">
<caret line="264" column="36" lean-forward="true" selection-start-line="264" selection-start-column="36" selection-end-line="264" selection-end-column="36" />
=======
<state>
<caret line="31" column="4" selection-start-line="31" selection-start-column="4" selection-end-line="31" selection-end-column="4" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="322">
<caret line="28" column="48" lean-forward="true" selection-start-line="28" selection-start-column="48" selection-end-line="28" selection-end-column="48" />
>>>>>>> huabao
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="215">
<caret line="24" column="16" selection-start-line="24" selection-start-column="5" selection-end-line="24" selection-end-column="16" />
=======
<state relative-caret-position="319">
<caret line="27" column="36" lean-forward="true" selection-start-line="27" selection-start-column="36" selection-end-line="27" selection-end-column="36" />
>>>>>>> huabao
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<<<<<<< HEAD
<state relative-caret-position="1068">
<caret line="75" column="19" lean-forward="true" selection-start-line="75" selection-start-column="19" selection-end-line="75" selection-end-column="19" />
=======
<state relative-caret-position="1129">
<caret line="88" column="48" lean-forward="true" selection-start-line="88" selection-start-column="48" selection-end-line="88" selection-end-column="48" />
>>>>>>> huabao
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/models/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="698">
<caret line="98" column="80" selection-start-line="98" selection-start-column="12" selection-end-line="98" selection-end-column="80" />
<folding>
<element signature="e#46#118#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="16" column="1" lean-forward="true" selection-start-line="16" selection-start-column="1" selection-end-line="16" selection-end-column="1" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/search/utils/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1290">
<caret line="94" column="8" selection-start-line="94" selection-start-column="8" selection-end-line="94" selection-end-column="8" />
<folding>
<element signature="e#47#61#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Python Script" />
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>grop</find>
<find>contrast_similar</find>
<find>topic</find>
<find>GroupTransfer</find>
<find>get_group_query_result</find>
<find>get_group_ids_by_aggs</find>
<find>pictorial_id</find>
<find>pictorial_name</find>
<find>get_recommend_topic_ids</find>
<find>ES_INDEX_PREFIX</find>
<find>physical/search/query_tag_id_by_topic</find>
<find>tag_name_list</find>
<find>pictorial</find>
<find>group</find>
<find>update_time</find>
</findStrings>
</component>
<component name="Git.Settings">
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py" />
<option value="$PROJECT_DIR$/physical/settings.py" />
<option value="$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/group.json" />
<option value="$PROJECT_DIR$/trans2es/mapping/pictorial.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/pictorial.json" />
<option value="$PROJECT_DIR$/trans2es/type_info.py" />
<option value="$PROJECT_DIR$/trans2es/models/group.py" />
<option value="$PROJECT_DIR$/trans2es/utils/group_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/models/pictorial.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/user.json" />
<option value="$PROJECT_DIR$/trans2es/utils/user_transfer.py" />
<option value="$PROJECT_DIR$/trans2es/models/user.py" />
<option value="$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py" />
<option value="$PROJECT_DIR$/search/views/topic.py" />
<option value="$PROJECT_DIR$/search/views/group.py" />
<option value="$PROJECT_DIR$/search/utils/topic.py" />
<option value="$PROJECT_DIR$/physical/settings_local.py.template" />
<option value="$PROJECT_DIR$/search/utils/group.py" />
<option value="$PROJECT_DIR$/trans2es/models/topic.py" />
<option value="$PROJECT_DIR$/trans2es/mapping/topic.json" />
<option value="$PROJECT_DIR$/trans2es/utils/topic_transfer.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<<<<<<< HEAD
<option name="x" value="-89" />
=======
<option name="x" value="-56" />
>>>>>>> huabao
<option name="y" value="23" />
<option name="width" value="1920" />
<option name="height" value="724" />
</component>
<component name="ProjectInspectionProfilesVisibleTreeState">
<entry key="Project Default">
<profile-state>
<expanded-state>
<State />
<State>
<id>General</id>
</State>
</expanded-state>
<selected-state>
<State>
<id>Buildout</id>
</State>
</selected-state>
</profile-state>
</entry>
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="injection" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="search" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="trans2es" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="physical" type="b2602c69:ProjectViewProjectNode" />
<item name="physical" type="462c0819:PsiDirectoryNode" />
<item name="trans2es" type="462c0819:PsiDirectoryNode" />
<item name="mapping" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/trans2es/mapping" />
</key>
<key name="MoveFile.RECENT_KEYS">
<recent name="$PROJECT_DIR$/search/views" />
</key>
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="SvnConfiguration">
<configuration />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="d7dd36ca-85ef-4a59-9db5-8b1ee4993a4e" name="Default Changelist" comment="" />
<created>1548319196437</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1548319196437</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<<<<<<< HEAD
<frame x="-89" y="23" width="1920" height="724" extended-state="0" />
=======
<frame x="-56" y="23" width="1920" height="724" extended-state="0" />
>>>>>>> huabao
<editor active="true" />
<layout>
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.18114798" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="DB Browser" order="2" />
<window_info id="Favorites" order="3" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Version Control" order="7" show_stripe_button="false" />
<window_info anchor="bottom" id="DB Execution Console" order="8" />
<<<<<<< HEAD
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.56978655" />
=======
<window_info anchor="bottom" id="Terminal" order="9" visible="true" weight="0.0" />
>>>>>>> huabao
<window_info anchor="bottom" id="Python Console" order="10" />
<window_info anchor="bottom" id="Event Log" order="11" side_tool="true" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/user.py</url>
<line>148</line>
<option name="timeStamp" value="1" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/user.py</url>
<line>139</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/search/views/group.py</url>
<line>36</line>
<option name="timeStamp" value="3" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/search/utils/group.py</url>
<line>11</line>
<option name="timeStamp" value="4" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>86</line>
<option name="timeStamp" value="5" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>110</line>
<option name="timeStamp" value="7" />
</line-breakpoint>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/trans2es/models/topic.py</url>
<line>136</line>
<option name="timeStamp" value="8" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/search/views/contrast_similar.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.py" />
<entry file="file://$PROJECT_DIR$/trans2es/mapping/group.json">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret column="1" lean-forward="true" selection-start-column="1" selection-end-column="1" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/management/commands/trans2es_mapping2es.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/face_user_contrast_similar.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="21" column="26" selection-start-line="21" selection-start-column="26" selection-end-line="21" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/contrast_similar.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/group_user_role.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pick_celebrity.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pick_topic.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pickuserrecord.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user_extra.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user_follow.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/group_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="150">
<caret line="14" column="25" selection-start-line="14" selection-start-column="25" selection-end-line="14" selection-end-column="25" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="210">
<caret line="14" column="26" lean-forward="true" selection-start-line="14" selection-start-column="26" selection-end-line="14" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/user_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="644">
<caret line="76" selection-start-line="76" selection-end-line="76" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/user.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="448">
<caret line="51" column="32" selection-start-line="51" selection-start-column="5" selection-end-line="51" selection-end-column="32" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/django_init.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/settings.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="12" column="9" lean-forward="true" selection-start-line="12" selection-start-column="9" selection-end-line="12" selection-end-column="9" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/management/commands/trans2es_data2es_parallel.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="585">
<caret line="97" lean-forward="true" selection-start-line="97" selection-end-line="97" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="285">
<caret line="19" column="25" selection-start-line="19" selection-start-column="25" selection-end-line="19" selection-end-column="25" />
<folding>
<element signature="e#47#79#0" expanded="true" />
<element signature="e#460#7466#0" />
<element signature="e#1396#1736#0" />
<element signature="e#1787#2318#0" />
<element signature="e#2367#3238#0" />
<element signature="e#3291#3907#0" />
<element signature="e#3960#4576#0" />
<element signature="e#3954#5060#0" />
<element signature="e#5114#6318#0" />
<element signature="e#6364#6780#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/user.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-395">
<caret line="1" column="12" selection-start-line="1" selection-start-column="12" selection-end-line="1" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/tag.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/search/views/tag.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-28" />
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/pick.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/app_conf.xml">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/common.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="72">
<caret line="6" column="6" selection-start-line="6" selection-start-column="6" selection-end-line="6" selection-end-column="6" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/celery.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/celery_task_router.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="300">
<caret line="25" column="23" lean-forward="true" selection-start-line="25" selection-start-column="23" selection-end-line="25" selection-end-column="23" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/urls.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/wsgi.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/physical/settings_local.py.template">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1837">
<caret line="146" column="26" selection-start-line="146" selection-start-column="26" selection-end-line="146" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/physical/views.py">
<provider selected="true" editor-type-id="text-editor" />
</entry>
<entry file="file://$PROJECT_DIR$/libs/es.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="120">
<caret line="17" column="43" selection-start-line="17" selection-start-column="28" selection-end-line="17" selection-end-column="43" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/group.py">
<<<<<<< HEAD
=======
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="147">
<caret line="115" column="21" selection-start-line="115" selection-start-column="21" selection-end-line="115" selection-end-column="21" />
<folding>
<element signature="e#218#1347#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/injection/data_sync/tasks.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="180">
<caret line="22" column="22" selection-start-line="22" selection-start-column="22" selection-end-line="22" selection-end-column="38" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/utils/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1290">
<caret line="94" column="8" selection-start-line="94" selection-start-column="8" selection-end-line="94" selection-end-column="8" />
<folding>
<element signature="e#47#61#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="319">
<caret line="27" column="36" lean-forward="true" selection-start-line="27" selection-start-column="36" selection-end-line="27" selection-end-column="36" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="823">
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/mapping/pictorial.json">
>>>>>>> huabao
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="240">
<caret line="16" column="1" lean-forward="true" selection-start-line="16" selection-start-column="1" selection-end-line="16" selection-end-column="1" />
</state>
</provider>
</entry>
<<<<<<< HEAD
=======
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="265">
<caret line="337" column="24" selection-start-line="337" selection-start-column="24" selection-end-line="337" selection-end-column="24" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/pictorial_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="322">
<caret line="28" column="48" lean-forward="true" selection-start-line="28" selection-start-column="48" selection-end-line="28" selection-end-column="48" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
>>>>>>> huabao
<entry file="file://$PROJECT_DIR$/trans2es/models/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="698">
<caret line="98" column="80" selection-start-line="98" selection-start-column="12" selection-end-line="98" selection-end-column="80" />
<folding>
<element signature="e#46#118#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/models/pictorial.py">
<provider selected="true" editor-type-id="text-editor">
<state>
<caret line="31" column="4" selection-start-line="31" selection-start-column="4" selection-end-line="31" selection-end-column="4" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1129">
<caret line="88" column="48" lean-forward="true" selection-start-line="88" selection-start-column="48" selection-end-line="88" selection-end-column="48" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<<<<<<< HEAD
<entry file="file://$PROJECT_DIR$/trans2es/mapping/topic.json">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="215">
<caret line="24" column="16" selection-start-line="24" selection-start-column="5" selection-end-line="24" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/group.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="510">
<caret line="42" column="16" lean-forward="true" selection-start-line="42" selection-start-column="16" selection-end-line="42" selection-end-column="16" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/search/views/topic.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="15">
<caret line="103" column="37" selection-start-line="103" selection-start-column="32" selection-end-line="103" selection-end-column="37" />
<folding>
<element signature="e#466#1607#0" />
<element signature="e#7074#7716#0" />
<element signature="e#7816#8386#0" />
<element signature="e#8511#9086#0" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/type_info.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="307">
<caret line="264" column="36" lean-forward="true" selection-start-line="264" selection-start-column="36" selection-end-line="264" selection-end-column="36" />
<folding>
<element signature="e#15#87#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/trans2es/utils/topic_transfer.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1068">
<caret line="75" column="19" lean-forward="true" selection-start-line="75" selection-start-column="19" selection-end-line="75" selection-end-column="19" />
<folding>
<element signature="e#46#55#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
=======
>>>>>>> huabao
</component>
<component name="masterDetails">
<states>
<state key="ScopeChooserConfigurable.UI">
<settings>
<splitter-proportions>
<option name="proportions">
<list>
<option value="0.2" />
</list>
</option>
</splitter-proportions>
</settings>
</state>
</states>
</component>
</project>
\ No newline at end of file
......@@ -9,11 +9,13 @@
<config name="initializer_list">
<element value="physical.django_init"/>
<element value="search.views.topic"/>
<element value="search.views.business_topic"/>
<element value="search.views.pick"/>
<element value="search.views.group"/>
<element value="search.views.user"/>
<element value="search.views.tag"/>
<element value="search.views.contrast_similar"/>
<element value="injection.data_sync.tasks"/>
<element value="search.views.contrast_similar"/>
</config>
</gm_rpcd_config>
......@@ -4,14 +4,15 @@ from celery import shared_task
from django.conf import settings
from django.core import serializers
from trans2es.type_info import get_type_info_map
#from rpc.all import get_rpc_remote_invoker
# from rpc.all import get_rpc_remote_invoker
from libs.es import ESPerform
import logging
import traceback
from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar,UserSimilarScore
import json
@shared_task
def write_to_es(es_type, pk_list, use_batch_query_set=False):
try:
......@@ -20,6 +21,7 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
type_info = type_info_map[es_type]
logging.info("duan add,es_type:%s" % str(es_type))
logging.info("get es_type:%s"%es_type)
type_info.insert_table_by_pk_list(
sub_index_name=es_type,
pk_list=pk_list,
......@@ -33,7 +35,8 @@ def write_to_es(es_type, pk_list, use_batch_query_set=False):
@shared_task
def sync_face_similar_data_to_redis():
try:
result_items = FaceUserContrastSimilar.objects.filter(is_online=True,is_deleted=False).distinct().values("participant_user_id").values_list("participant_user_id",flat=True)
result_items = FaceUserContrastSimilar.objects.filter(is_online=True, is_deleted=False).distinct().values(
"participant_user_id").values_list("participant_user_id", flat=True)
logging.info("duan add,begin sync_face_similar_data_to_redis!")
......@@ -41,19 +44,45 @@ def sync_face_similar_data_to_redis():
for participant_user_id in result_items:
redis_key = redis_key_prefix + str(participant_user_id)
similar_result_items = FaceUserContrastSimilar.objects.filter(is_online=True,is_deleted=False,participant_user_id=participant_user_id,similarity__gt=0.4).order_by("-similarity").limit(100)
similar_result_items = FaceUserContrastSimilar.objects.filter(is_online=True, is_deleted=False,
participant_user_id=participant_user_id,
similarity__gt=0.3).order_by(
"-similarity")
item_list = list()
for item in similar_result_items:
item_list.append(
{
"contrast_user_id":item.contrast_user_id,
"similarity":item.similarity
"contrast_user_id": item.contrast_user_id,
"similarity": item.similarity
}
)
redis_client.set(redis_key,json.dumps(item_list))
redis_client.set(redis_key, json.dumps(item_list))
logging.info("duan add,participant_user_id:%d set data done!" % participant_user_id)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
@shared_task
def sync_user_similar_score():
try:
results_items = UserSimilarScore.objects.filter(is_deleted=False).distinct().values("user_id").values_list("user_id",flat=True)
redis_key_prefix = "physical:user_similar_score:user_id:"
logging.info("duan add,begin sync user_similar_score!")
for user_id in results_items:
redis_key = redis_key_prefix + str(user_id)
similar_results_items = UserSimilarScore.objects.filter(is_deleted=False,user_id=user_id).order_by("-score")
item_list = list()
for item in similar_results_items:
contrast_user_id = item.contrast_user_id
score = item.score
item_list.append(
[contrast_user_id,score]
)
redis_client.set(redis_key, json.dumps(item_list))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -94,7 +94,7 @@ class ESPerform(object):
return False
@classmethod
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc"):
def put_index_mapping(cls,es_cli,sub_index_name,mapping_type="_doc",force_sync=False):
"""
:remark: put index mapping
:param es_cli:
......@@ -107,10 +107,14 @@ class ESPerform(object):
write_alias_name = cls.get_official_index_name(sub_index_name,"write")
index_exist = es_cli.indices.exists(write_alias_name)
if not index_exist:
if not index_exist and not force_sync:
return False
mapping_dict = cls.__load_mapping(sub_index_name)
logging.info("get write_alias_name:%s"%write_alias_name)
logging.info("get mapping_dict:%s"%mapping_dict)
logging.info("get mapping_type:%s"%mapping_type)
es_cli.indices.put_mapping(index=write_alias_name,body=mapping_dict,doc_type=mapping_type)
return True
......@@ -143,7 +147,7 @@ class ESPerform(object):
try:
assert (es_cli is not None)
official_index_name = cls.get_official_index_name(sub_index_name)
official_index_name = cls.get_official_index_name(sub_index_name, "write")
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
if not auto_create_index:
......@@ -208,3 +212,34 @@ class ESPerform(object):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]}
@classmethod
def if_es_node_load_high(cls, es_cli):
try:
assert (es_cli is not None)
high_num = 0
es_nodes_list = list()
es_nodes_ori_info = es_cli.cat.nodes()
es_nodes_info_list = es_nodes_ori_info.split("\n")
for item in es_nodes_info_list:
try:
item_list = item.split(" ")
if len(item_list)>4:
cpu_load = item_list[3]
if int(cpu_load) > 60:
high_num += 1
es_nodes_list.append(int(cpu_load))
except:
logging.error("catch exception,item:%s,err_msg:%s" % (str(item),traceback.format_exc()))
return True
if high_num > 3:
logging.info("check es_nodes_load high,cpu load:%s" % str(es_nodes_info_list))
return True
else:
return False
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return True
......@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
import six
import random
from django.db import models
import logging
import traceback
class ITableChunk(object):
......@@ -147,36 +149,47 @@ class TableSlicerChunk(ITableChunk):
class TableSlicer(object):
def __init__(self, queryset, chunk_size=None, chunk_count=None, sep_list=None):
assert isinstance(queryset, models.QuerySet)
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
assert chunk_count is None or isinstance(chunk_count, six.integer_types)
assert sep_list is None or isinstance(sep_list, list)
try:
assert isinstance(queryset, models.QuerySet)
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
assert chunk_size is None or isinstance(chunk_size, six.integer_types)
if sep_list is not None:
sep_list = list(sep_list)
else:
count = queryset.count()
if chunk_size is None:
chunk_size = count / chunk_count
index_list = list(range(0, count, chunk_size))
sep_list = [
queryset.order_by('pk').values_list('pk', flat=True)[index]
for index in index_list
]
assert chunk_count is None or isinstance(chunk_count, six.integer_types)
self._model = queryset.model
self._query = queryset.query
self._sep_list = [None] + sep_list + [None]
assert sep_list is None or isinstance(sep_list, list)
assert (chunk_size is not None) + (chunk_count is not None) + (sep_list is not None) == 1
if sep_list is not None:
sep_list = list(sep_list)
else:
count = queryset.count()
if chunk_size is None:
chunk_size = count / chunk_count
index_list = list(range(0, count, chunk_size))
sep_list = [
queryset.order_by('pk').values_list('pk', flat=True)[index]
for index in index_list
]
self._model = queryset.model
self._query = queryset.query
self._sep_list = [None] + sep_list + [None]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def chunks(self):
reversed_sep_list = list(reversed(self._sep_list))
for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i+1]
pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
try:
reversed_sep_list = list(reversed(self._sep_list))
logging.info("duan add,reversed_sep_list:%d" % (len(self._sep_list) - 1))
for i in range(len(self._sep_list) - 1):
pk_start = reversed_sep_list[i + 1]
pk_stop = reversed_sep_list[i]
yield TableSlicerChunk(model=self._model, query=self._query, pk_start=pk_start, pk_stop=pk_stop)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
class TableStreamingSlicer(object):
......
from django.contrib import admin
# Register your models here.
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
# Create your views here.
# -*- coding: UTF-8 -*-
# !/usr/bin/env python
from kafka import KafkaConsumer
from libs.cache import redis_client
import logging
from linucb.views.linucb import LinUCB
import json
from trans2es.models.tag import TopicTag
import traceback
from django.conf import settings
class KafkaManager(object):
consumser_obj = None
@classmethod
def get_kafka_consumer_ins(cls, topic_name=None):
if not cls.consumser_obj:
topic_name = settings.KAFKA_TOPIC_NAME if not topic_name else topic_name
cls.consumser_obj = KafkaConsumer(topic_name,bootstrap_servers=settings.KAFKA_BROKER_LIST)
# cls.consumser_obj.subscribe([topic_name])
return cls.consumser_obj
class CollectData(object):
def __init__(self):
self.linucb_matrix_redis_prefix = "physical:linucb:device_id:"
self.linucb_recommend_redis_prefix = "physical:linucb:tag_recommend:device_id:"
# 默认
self.user_feature = [0,1]
def _get_user_linucb_info(self, device_id):
try:
redis_key = self.linucb_matrix_redis_prefix + str(device_id)
# dict的key为标签ID,value为4个矩阵
redis_linucb_tag_data_dict = redis_client.hgetall(redis_key)
return redis_linucb_tag_data_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
def update_recommend_tag_list(self, device_id,user_feature=None):
try:
recommend_tag_list = list()
redis_linucb_tag_data_dict = self._get_user_linucb_info(device_id)
if len(redis_linucb_tag_data_dict) == 0:
recommend_tag_list = LinUCB.get_default_tag_list()
LinUCB.init_device_id_linucb_info(redis_client, self.linucb_matrix_redis_prefix,device_id,recommend_tag_list)
else:
user_feature = user_feature if user_feature else self.user_feature
recommend_tag_list = LinUCB.linucb_recommend_tag(device_id,redis_linucb_tag_data_dict,user_feature,list(redis_linucb_tag_data_dict.keys()))
logging.info("duan add,device_id:%s,recommend_tag_list:%s" % (str(device_id), str(recommend_tag_list)))
if len(recommend_tag_list) > 0:
tag_recommend_redis_key = self.linucb_recommend_redis_prefix + str(device_id)
redis_client.set(tag_recommend_redis_key, json.dumps(recommend_tag_list))
# Todo:设置过期时间,调研set是否支持
redis_client.expire(tag_recommend_redis_key, 7*24*60*60)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def update_user_linucb_tag_info(self, reward, device_id, tag_id, user_feature=None):
try:
user_feature = user_feature if user_feature else self.user_feature
return LinUCB.update_linucb_info(user_feature, reward, tag_id, device_id,self.linucb_matrix_redis_prefix,redis_client)
except:
logging.error("update_user_linucb_tag_info error!")
return False
def consume_data_from_kafka(self,topic_name=None):
try:
user_feature = [1,1]
kafka_consumer_obj = KafkaManager.get_kafka_consumer_ins(topic_name)
while True:
msg_dict = kafka_consumer_obj.poll(timeout_ms=100)
for msg_key in msg_dict:
consume_msg = msg_dict[msg_key]
for ori_msg in consume_msg:
try:
logging.info(ori_msg)
raw_val_dict = json.loads(ori_msg.value)
if "type" in raw_val_dict and "on_click_feed_topic_card" == raw_val_dict["type"]:
topic_id = raw_val_dict["params"]["business_id"] or raw_val_dict["params"]["topic_id"]
device_id = raw_val_dict["device"]["device_id"]
logging.info("consume topic_id:%s,device_id:%s" % (str(topic_id), str(device_id)))
tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id=topic_id)
for sql_item in sql_query_results:
tag_list.append(sql_item.tag_id)
is_click = 1
is_vote = 0
reward = 1 if is_click or is_vote else 0
logging.info("positive tag_list,device_id:%s,topic_id:%s,tag_list:%s" % (
str(device_id), str(topic_id), str(tag_list)))
for tag_id in tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
elif "type" in raw_val_dict and "page_precise_exposure" == raw_val_dict["type"]:
if isinstance(raw_val_dict["params"]["exposure_cards"],str):
exposure_cards_list = json.loads(raw_val_dict["params"]["exposure_cards"])
elif isinstance(raw_val_dict["params"]["exposure_cards"],list):
exposure_cards_list = raw_val_dict["params"]["exposure_cards"]
else:
exposure_cards_list = list()
device_id = raw_val_dict["device"]["device_id"]
exposure_topic_id_list = list()
for item in exposure_cards_list:
if "card_id" not in item:
continue
exposure_topic_id = item["card_id"]
logging.info(
"consume exposure topic_id:%s,device_id:%s" % (str(exposure_topic_id), str(device_id)))
exposure_topic_id_list.append(exposure_topic_id)
topic_tag_id_dict = dict()
tag_list = list()
sql_query_results = TopicTag.objects.filter(is_online=True, topic_id__in=exposure_topic_id_list)
for sql_item in sql_query_results:
tag_list.append(sql_item.tag_id)
if sql_item.topic_id not in topic_tag_id_dict:
topic_tag_id_dict[sql_item.topic_id] = list()
topic_tag_id_dict[sql_item.topic_id].append(sql_item.tag_id)
is_click = 0
is_vote = 0
reward = 1 if is_click or is_vote else 0
logging.info("negative tag_list,device_id:%s,topic_tag_id_dict:%s" % (
str(device_id), str(topic_tag_id_dict)))
for tag_id in tag_list:
self.update_user_linucb_tag_info(reward, device_id, tag_id, user_feature)
# 更新该用户的推荐tag数据,放在 更新完成user tag行为信息之后
self.update_recommend_tag_list(device_id, user_feature)
else:
logging.warning("unknown type msg:%s" % raw_val_dict.get("type", "missing type"))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
# -*- coding: UTF-8 -*-
# !/usr/bin/env python
import numpy as np
import redis
from libs.cache import redis_client
from trans2es.models.tag import Tag
import logging
import traceback
import json
import pickle
class LinUCB:
d = 2
alpha = 0.25
r1 = 1
r0 = -0.5
default_tag_list = list()
@classmethod
def get_default_tag_list(cls):
try:
if len(cls.default_tag_list) == 0:
query_item_results = Tag.objects.filter(is_online=True)
for item in query_item_results:
cls.default_tag_list.append(item.id)
return cls.default_tag_list[:20]
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def linucb_recommend_tag(cls,device_id,redis_linucb_tag_data_dict,user_features_list,tag_list):
"""
:remark 获取推荐标签
:param redis_linucb_tag_data_dict:
:param user_features_list:
:param tag_list:
:return:
"""
try:
Aa_list = list()
theta_list = list()
for tag_id in tag_list:
tag_dict = pickle.loads(redis_linucb_tag_data_dict[tag_id])
Aa_list.append(tag_dict["Aa"])
theta_list.append(tag_dict["theta"])
xaT = np.array([user_features_list])
xa = np.transpose(xaT)
art_max = -1
old_pa = 0
AaI_tmp = np.array(Aa_list)
theta_tmp = np.array(theta_list)
np_array = np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa))
# top_tag_list_len = int(np_array.size/2)
# top_np_ind = np.argpartition(np_array, -top_tag_list_len)[-top_tag_list_len:]
#
# top_tag_list = list()
# top_np_list = top_np_ind.tolist()
# for tag_id in top_np_list:
# top_tag_list.append(tag_id)
#art_max = tag_list[np.argmax(np.dot(xaT, theta_tmp) + cls.alpha * np.sqrt(np.dot(np.dot(xaT, AaI_tmp), xa)))]
top_tag_set = set()
np_score_list = list()
np_score_dict = dict()
for score_index in range(0,np_array.size):
score = np_array.take(score_index)
np_score_list.append(score)
if score not in np_score_dict:
np_score_dict[score] = [score_index]
else:
np_score_dict[score].append(score_index)
sorted_np_score_list = sorted(np_score_list,reverse=True)
for top_score in sorted_np_score_list:
for top_score_index in np_score_dict[top_score]:
top_tag_set.add(str(tag_list[top_score_index], encoding="utf-8"))
if len(top_tag_set) >= 10:
break
logging.info("duan add,device_id:%s,sorted_np_score_list:%s,np_score_dict:%s" % (str(device_id), str(sorted_np_score_list), str(np_score_dict)))
return list(top_tag_set)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def init_device_id_linucb_info(cls, redis_cli,redis_prefix, device_id, tag_list):
try:
redis_key = redis_prefix + str(device_id)
user_tag_linucb_dict = dict()
for tag_id in tag_list:
init_dict = {
"Aa": np.identity(cls.d),
"theta": np.zeros((cls.d, 1)),
"ba": np.zeros((cls.d, 1)),
"AaI": np.identity(cls.d)
}
pickle_data = pickle.dumps(init_dict)
user_tag_linucb_dict[tag_id] = pickle_data
redis_cli.hmset(redis_key, user_tag_linucb_dict)
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
@classmethod
def update_linucb_info(cls, user_features,reward, tag_id, device_id, redis_prefix,redis_cli):
try:
if reward == -1:
logging.warning("reward val error!")
elif reward == 1 or reward == 0:
if reward == 1:
r = cls.r1
else:
r = cls.r0
xaT = np.array([user_features])
xa = np.transpose(xaT)
redis_key = redis_prefix + str(device_id)
ori_redis_tag_data = redis_cli.hget(redis_key, tag_id)
if not ori_redis_tag_data:
LinUCB.init_device_id_linucb_info(redis_client, redis_prefix, device_id,[tag_id])
else:
ori_redis_tag_dict = pickle.loads(ori_redis_tag_data)
new_Aa_matrix = ori_redis_tag_dict["Aa"] + np.dot(xa, xaT)
new_AaI_matrix = np.linalg.solve(new_Aa_matrix, np.identity(cls.d))
new_ba_matrix = ori_redis_tag_dict["ba"] + r*xa
user_tag_dict = {
"Aa": new_Aa_matrix,
"ba": new_ba_matrix,
"AaI": new_AaI_matrix,
"theta": np.dot(new_AaI_matrix, new_ba_matrix)
}
redis_cli.hset(redis_key, tag_id, pickle.dumps(user_tag_dict))
return True
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
\ No newline at end of file
# coding=utf-8
from __future__ import unicode_literals, print_function, absolute_import
import itertools
from django.conf import settings
from __future__ import unicode_literals, print_function, absolute_import
import itertools
from django.conf import settings
import logging
class CeleryTaskRouter(object):
queue_task_map = {
"tapir-alpha":[
"tapir-alpha": [
'injection.data_sync.tasks.write_to_es',
]
}
......@@ -30,4 +31,4 @@ class CeleryTaskRouter(object):
logging.info("duan add,task is:%s" % str(task))
queue_name_or_none = self.task_queue_map.get(task)
return queue_name_or_none
\ No newline at end of file
return queue_name_or_none
......@@ -14,188 +14,4 @@ import os
from .log_settings import *
from datetime import timedelta
from celery.schedules import crontab
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '^j3sg)sj8rc@du74%fb$c2926tv!!4g(kp-=rx1)c5!1&1(dq='
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = False
ALLOWED_HOSTS = []
# Application definition
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# CELERY_SEND_EVENTS = True
# CELERY_SEND_TASK_SENT_EVENT = True
#
# CELERY_DEFAULT_EXCHANGE = 'celery'
# CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
# CELERY_DEFAULT_ROUTING_KEY = 'celery'
#
# CELERY_QUEUES = {
# 'celery': {
# 'exchange': CELERY_DEFAULT_EXCHANGE,
# 'routing_key': CELERY_DEFAULT_ROUTING_KEY,
# },
# 'order': {
# 'exchange': 'order',
# 'routing_key': 'order',
# },
# }
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
CELERYBEAT_SCHEDULE = {
'sync_face_similar_data_to_redis': {
'task': 'injection.data_sync.tasks.sync_face_similar_data_to_redis',
'schedule': timedelta(seconds=120),
'args': ()
},
}
"""
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
"""
ROOT_URLCONF = 'physical.urls'
WSGI_APPLICATION = 'physical.wsgi.application'
REDIS_URL = "redis://127.0.0.1:6379"
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASE_ROUTERS = ['physical.DBRouter.DBRouter']
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'alpha',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
"HOST": 'rm-2zeaut61u9sm21m0bjo.mysql.rds.aliyuncs.com',
#'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
},
'face': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'face',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
"HOST": "rm-2zeaut61u9sm21m0bjo.mysql.rds.aliyuncs.com",
# 'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
# 'CONN_MAX_AGE': None,
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX = "gm-dbmw"
MIDDLEWARE_CLASSES = (
'gm_tracer.middleware.TracerMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'helios.DjangoL5dMiddleware',
)
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
# LANGUAGE_CODE = 'en-us'
#
TIME_ZONE = 'Asia/Shanghai'
#
USE_I18N = True
USE_L10N = True
#
# USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_URL = '/static/'
from .settings_local import *
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.10/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '^j3sg)sj8rc@du74%fb$c2926tv!!4g(kp-=rx1)c5!1&1(dq='
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = False
ALLOWED_HOSTS = []
# Application definition
SENTRY_CELERY_ENDPOINT = "http://60b0004c8884420f8067fb32fc3ed244:20f97fc73ffa4aad9735d0e6542a6d78@sentry.igengmei.com/140"
BROKER_URL = "redis://127.0.0.1:6379/8"
# CELERY_SEND_EVENTS = True
# CELERY_SEND_TASK_SENT_EVENT = True
#
# CELERY_DEFAULT_EXCHANGE = 'celery'
# CELERY_DEFAULT_EXCHANGE_TYPE = 'direct'
# CELERY_DEFAULT_ROUTING_KEY = 'celery'
#
# CELERY_QUEUES = {
# 'celery': {
# 'exchange': CELERY_DEFAULT_EXCHANGE,
# 'routing_key': CELERY_DEFAULT_ROUTING_KEY,
# },
# 'order': {
# 'exchange': 'order',
# 'routing_key': 'order',
# },
# }
CELERY_BROKER_URL = "redis://127.0.0.1:6379/8"
CELERY_TIMEZONE = 'Asia/Shanghai'
CELERY_ROUTES = ['physical.celery_task_router.CeleryTaskRouter']
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
'trans2es',
'search',
'injection.data_sync',
)
CELERYBEAT_SCHEDULE = {
'sync_face_similar_data_to_redis': {
'task': 'injection.data_sync.tasks.sync_face_similar_data_to_redis',
'schedule': 120.0,
'args': ()
},
}
"""
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
]
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
AUTH_PASSWORD_VALIDATORS = [
{
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
},
{
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
},
]
"""
ROOT_URLCONF = 'physical.urls'
WSGI_APPLICATION = 'physical.wsgi.application'
REDIS_URL = "redis://127.0.0.1:6379"
# Database
# https://docs.djangoproject.com/en/1.10/ref/settings/#databases
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'alpha',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
},
'face': {
'ENGINE': 'django.db.backends.mysql',
'NAME': 'face',
'USER': 'work',
'PASSWORD': 'Gengmei123',
# 'HOST': 'rm-2ze5k2we69904787l.mysql.rds.aliyuncs.com',
'HOST': 'rm-2zeaut61u9sm21m0b.mysql.rds.aliyuncs.com',
'PORT': '3306',
# 'CONN_MAX_AGE': None,
'OPTIONS': {
"init_command": "SET foreign_key_checks = 0;",
"charset": "utf8mb4",
},
}
}
ES_INFO_LIST = [
{
"host": "10.29.130.141",
"port": 9200
}
]
ES_INDEX_PREFIX = "gm-dbmw"
MIDDLEWARE_CLASSES = (
'gm_tracer.middleware.TracerMiddleware',
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'helios.DjangoL5dMiddleware',
)
# Password validation
# https://docs.djangoproject.com/en/1.10/ref/settings/#auth-password-validators
# Internationalization
# https://docs.djangoproject.com/en/1.10/topics/i18n/
# LANGUAGE_CODE = 'en-us'
#
TIME_ZONE = 'Asia/Shanghai'
#
USE_I18N = True
USE_L10N = True
#
# USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.10/howto/static-files/
STATIC_URL = '/static/'
......@@ -12,6 +12,8 @@ PyMySQL==0.9.2
gunicorn==19.9.0
gevent==1.3.7
pypinyin==0.34.1
numpy==1.16.2
lz4==2.1.6
git+ssh://git@git.wanmeizhensuo.com/backend/gm-rpcd.git@master
git+ssh://git@git.wanmeizhensuo.com/backend/helios.git@master
......
......@@ -47,7 +47,7 @@ class GroupUtils(object):
return {"total_count":0, "hits":[]}
@classmethod
def get_hot_group_recommend_result_list(cls,offset,size,es_cli_obj=None):
def get_hot_pictorial_recommend_result_list(cls,offset,size,es_cli_obj=None):
try:
if not es_cli_obj:
es_cli_obj = ESPerform.get_cli()
......@@ -68,19 +68,19 @@ class GroupUtils(object):
"includes":["id"]
}
result_dict = ESPerform.get_search_results(es_cli_obj,"group",q,offset,size)
result_dict = ESPerform.get_search_results(es_cli_obj,"pictorial",q,offset,size)
group_ids_list = []
pictorial_ids_list = []
if len(result_dict["hits"]) > 0:
group_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]
pictorial_ids_list = [item["_source"]["id"] for item in result_dict["hits"]]
return group_ids_list
return pictorial_ids_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@classmethod
def get_user_attention_group_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
def get_user_attention_pictorial_list(cls,user_id,offset=0,size=10,es_cli_obj=None):
"""
:remark: 获取用户关注小组列表
:return:
......@@ -100,12 +100,12 @@ class GroupUtils(object):
}
}
q["_source"] = {
"includes":["attention_group_id_list"]
"includes":["attention_pictorial_id_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj,"user",q,offset,size)
if len(result_dict["hits"])>0:
return result_dict["hits"][0]["_source"]["attention_group_id_list"]
return result_dict["hits"][0]["_source"]["attention_pictorial_id_list"]
else:
return []
except:
......@@ -113,7 +113,7 @@ class GroupUtils(object):
return []
@classmethod
def get_group_ids_by_aggs(cls,group_id_list,es_cli_obj=None):
def get_pictorial_ids_by_aggs(cls,pictorial_ids_list,es_cli_obj=None):
"""
:remark:聚合查询获取小组列表
:param group_id_list:
......@@ -127,13 +127,13 @@ class GroupUtils(object):
q["size"]=0
q["query"] = {
"terms":{
"group_id":group_id_list
"pictorial_id":pictorial_ids_list
}
}
q["aggs"] = {
"group_ids":{
"pictorial_ids":{
"terms":{
"field":"group_id"
"field":"pictorial_id"
},
"aggs":{
"max_date":{
......@@ -146,12 +146,12 @@ class GroupUtils(object):
}
result_dict = ESPerform.get_search_results(es_cli_obj,"topic",q,aggregations_query=True)
buckets_list = result_dict["aggregations"]["group_ids"]["buckets"]
buckets_list = result_dict["aggregations"]["pictorial_ids"]["buckets"]
sorted_buckets_list = sorted(buckets_list,key=lambda item:item["max_date"]["value"],reverse=True)
sorted_group_id_list = [item["key"] for item in sorted_buckets_list]
sorted_pictorial_id_list = [item["key"] for item in sorted_buckets_list]
return sorted_group_id_list
return sorted_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
\ No newline at end of file
......@@ -12,7 +12,6 @@ from .common import TopicDocumentField
from search.utils.common import *
class TopicUtils(object):
@classmethod
......@@ -27,22 +26,23 @@ class TopicUtils(object):
try:
q = dict()
q["query"] = {
"term":{
"term": {
"user_id": user_id
}
}
q["_source"] = ["tag_list","attention_user_id_list", "pick_user_id_list", "same_group_user_id_list"]
q["_source"] = {
"include": ["tag_list", "attention_user_id_list", "pick_user_id_list", "same_pictorial_user_id_list"]
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), "user", q, offset, size)
return result_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"total_count":0,"hits":[]}
return {"total_count": 0, "hits": []}
@classmethod
def analyze_related_user_id_list(cls,related_user_id_list):
def analyze_related_user_id_list(cls, related_user_id_list):
"""
:remark:获取指定用户关联的 用户列表
:param related_user_id_list:
......@@ -61,13 +61,13 @@ class TopicUtils(object):
elif item["country_id"] == 2:
korea_user_id_list.append(item["user_id"])
return (chinese_user_id_list,japan_user_id_list,korea_user_id_list)
return (chinese_user_id_list, japan_user_id_list, korea_user_id_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return ([],[],[])
return ([], [], [])
@classmethod
def refresh_redis_hash_data(cls, redis_cli,redis_key,redis_data_dict):
def refresh_redis_hash_data(cls, redis_cli, redis_key, redis_data_dict):
try:
redis_cli.hmset(redis_key, redis_data_dict)
return True
......@@ -76,13 +76,13 @@ class TopicUtils(object):
return False
@classmethod
def ___get_should_term_list(cls,ori_list,field_name="tag_list"):
def ___get_should_term_list(cls, ori_list, field_name="tag_list"):
try:
should_term_list = list()
for term_id in ori_list:
term_dict = {
"term":{
field_name:{"value":term_id}
"term": {
field_name: {"value": term_id}
}
}
should_term_list.append(term_dict)
......@@ -92,7 +92,38 @@ class TopicUtils(object):
return []
@classmethod
def get_recommend_topic_ids(cls,user_id,offset,size,query=None,query_type=TopicPageType.FIND_PAGE,filter_topic_id_list=[],must_topic_id_list=[]):
def get_topic_tag_info(cls, offset, size, topic_id_list, user_id):
try:
q = {
"query": {
"terms": {
"id": topic_id_list
}
},
"_source": {
"includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
}
}
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=offset, size=size)
topic_id_dict = dict()
for item in result_dict["hits"]:
if "edit_tag_list" in item["_source"]:
topic_id_dict[str(item["_source"]["id"])] = item["_source"]["edit_tag_list"]
else:
topic_id_dict[str(item["_source"]["id"])] = list()
return topic_id_dict
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
@classmethod
def get_recommend_topic_ids(cls, user_id, tag_id, offset, size, single_size, query=None,
query_type=TopicPageType.FIND_PAGE,
filter_topic_id_list=[], test_score=False, must_topic_id_list=[], recommend_tag_list=[],
user_similar_score_list=[]):
"""
:需增加打散逻辑
:remark:获取首页推荐帖子列表
......@@ -105,7 +136,8 @@ class TopicUtils(object):
try:
attention_user_id_list = list()
pick_user_id_list = list()
same_group_id_list = list()
# same_group_id_list = list()
same_pictorial_id_list = list()
user_tag_list = list()
result_dict = TopicUtils.get_related_user_info(user_id, 0, 1)
......@@ -118,84 +150,128 @@ class TopicUtils(object):
pick_user_info_list = result_dict["hits"][0]["_source"]["pick_user_id_list"]
pick_user_id_list = [item["user_id"] for item in pick_user_info_list]
same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"]
same_group_id_list = [item["user_id"] for item in same_group_user_info_list]
# same_group_user_info_list = result_dict["hits"][0]["_source"]["same_group_user_id_list"]
# same_group_id_list = [item["user_id"] for item in same_group_user_info_list]
# same_group_id_list = same_group_id_list[:100]
same_pictorial_user_info_list = result_dict["hits"][0]["_source"]["same_pictorial_user_id_list"]
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
same_pictorial_id_list = [item["user_id"] for item in same_pictorial_user_info_list]
same_pictorial_id_list = same_pictorial_id_list[:100]
# attention_user_id_term_list = cls.___get_should_term_list(attention_user_id_list,field_name="user_id")
# pick_user_id_term_list = cls.___get_should_term_list(pick_user_id_list,field_name="user_id")
# same_group_user_id_term_list = cls.___get_should_term_list(same_group_id_list,field_name="user_id")
user_tag_list = result_dict["hits"][0]["_source"]["tag_list"]
q = dict()
q["query"] = dict()
functions_list = [
{
"gauss": {
"update_time": {
"scale": "1d",
"decay": 0.9
"filter": {
"term": {
"language_type": 1
}
},
"weight": 5
"weight": 3
},
{
"gauss": {
"linear": {
"create_time": {
"scale": "1d",
"decay": 0.9
"decay": 0.99
}
},
"weight": 10
"weight": 500
}
]
if len(attention_user_id_list)>0:
if len(user_similar_score_list) > 0:
for item in user_similar_score_list[:100]:
score_item = 3 * 10 * item[1]
functions_list.append(
{
"filter": {"bool": {
"should": {"term": {"user_id": item[0]}}}},
"weight": score_item,
}
)
if len(attention_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}},
"should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3,
}
)
if len(pick_user_id_list)>0:
if len(pick_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}},
"should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2
}
)
if len(same_group_id_list)>0:
# if len(same_group_id_list)>0:
# functions_list.append(
# {
# "filter": {"bool": {
# "should": {"terms":{"user_id":same_group_id_list}}}},
# "weight": 1
# }
# )
if len(same_pictorial_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":same_group_id_list}}}},
"should": {"terms": {"user_id": same_pictorial_id_list}}}},
"weight": 1
}
)
# query_tag_term_list = cls.___get_should_term_list(user_tag_list)
if len(user_tag_list)>0:
if len(user_tag_list) > 0:
functions_list.append(
{
"filter":{"bool":{
"should":{"terms":{"tag_list":user_tag_list}}}},
"filter": {"bool": {
"should": {"terms": {"tag_list": user_tag_list}}}},
"weight": 1
}
)
if len(recommend_tag_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms": {"edit_tag_list": recommend_tag_list}}}},
"weight": 3
}
)
low_content_level = 4 if query_type==TopicPageType.FIND_PAGE else 3
low_content_level = 4 if query_type == TopicPageType.FIND_PAGE else 3
query_function_score = {
"query": {
"bool": {
"filter": [
{"range": {"content_level": {"gte": low_content_level, "lte": 5}}},
{"term": {"has_image":True}},
{"term": {"has_image": True}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
]
],
"should": [
{
"bool": {
"must": [
{"term": {"has_image": True}},
{"term": {"has_video": False}}
]
}
}, {
"bool": {
"must": {
"term": {"has_video": True}
}
}
}
],
"minimum_should_match": 1
}
},
"score_mode": "sum",
......@@ -204,23 +280,23 @@ class TopicUtils(object):
}
if len(must_topic_id_list) > 0:
query_function_score["query"]["bool"]["must"] = {
"terms":{
"terms": {
"id": must_topic_id_list
}
}
if query_type==TopicPageType.FIND_PAGE and len(filter_topic_id_list)>0:
if len(filter_topic_id_list) > 0:
query_function_score["query"]["bool"]["must_not"] = {
"terms":{
"terms": {
"id": filter_topic_id_list
}
}
if query is not None:#搜索帖子
if query is not None: # 搜索帖子
multi_fields = {
'description': 200,
'content': 300,
'name': 400,
'tag_name_list':300,
'tag_name_list': 300,
}
query_fields = ['^'.join((k, str(v))) for (k, v) in multi_fields.items()]
multi_match = {
......@@ -229,26 +305,31 @@ class TopicUtils(object):
'operator': 'and',
'fields': query_fields,
}
query_function_score["query"]["bool"]["should"] = [
{'multi_match': multi_match}
{'multi_match': multi_match},
{"term": {"tag_list": tag_id}}
]
query_function_score["query"]["bool"]["minimum_should_match"] = 1
q["query"]["function_score"] = query_function_score
q["collapse"] = {
"field": "user_id"
}
q["_source"] = {
"includes":["id","group_id","offline_score"]
"includes": ["id", "pictorial_id", "offline_score", "user_id", "edit_tag_list"]
}
q["sort"] = [
{
"_script":{
"type":"number",
"script":{
"_script": {
"type": "number",
"script": {
"lang": "expression",
"source": "_score*doc['offline_score']"
"source": "_score+doc['offline_score']"
# "lang":"painless",
# "source":"_score+params._source.offline_score"
},
"order":"desc"
"order": "desc"
}
},
"_score"
......@@ -256,17 +337,46 @@ class TopicUtils(object):
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic", query_body=q,
offset=offset, size=size)
topic_id_dict = dict()
for item in result_dict["hits"]:
topic_id_dict[item["_source"]["id"]] = item["_source"]["group_id"]
topic_id_list = list()
same_pictorial_id_set = set()
same_user_id_set = set()
return topic_id_dict
for item in result_dict["hits"]:
topic_id_list.append(item["_source"]["id"])
# for item in result_dict["hits"]:
# if item["_source"]["group_id"]>0 and item["_source"]["group_id"] not in same_group_id_set:
# same_group_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_group_id_set.add(item["_source"]["id"])
#
# if item["_source"]["user_id"] not in same_user_id_set:
# same_user_id_set.add(item["_source"]["id"])
# topic_id_list.append(item["_source"]["id"])
# else:
# same_user_id_set.add(item["_source"]["id"])
#
# if len(topic_id_list) >= single_size:
# break
#
# if len(topic_id_list) < single_size:
# for topic_id in same_group_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
# for topic_id in same_user_id_set:
# topic_id_list.append(topic_id)
# if len(topic_id_list)>=single_size:
# break
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
return list()
@classmethod
def get_topic_detail_recommend_list(cls,user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size,es_cli_obj=None):
def get_topic_detail_recommend_list(cls, user_id, topic_id, topic_tag_list, topic_pictorial_id, topic_user_id,
filter_topic_user_id, have_read_topic_list, offset, size, es_cli_obj=None):
"""
:remark 帖子详情页推荐列表,缺少按时间衰减
:param user_id:
......@@ -291,33 +401,35 @@ class TopicUtils(object):
"weight": 1000
},
{
"gauss": {
"update_time": {
"scale": "1d",
"decay": 0.5
}
}
}
"linear": {
"create_time": {
"scale": "1d",
"decay": 0.5
}
}
}
]
if isinstance(topic_group_id,int) and topic_group_id > 0:
if isinstance(topic_pictorial_id, int) and topic_pictorial_id > 0:
functions_list.append(
{
"filter": {"term": {
"group_id": topic_group_id}},
"pictorial_id": topic_pictorial_id}},
"weight": 1,
}
)
# query_tag_term_list = cls.___get_should_term_list(topic_tag_list)
have_read_topic_list.append(topic_id)
query_function_score = {
"query":{
"bool":{
"query": {
"bool": {
"must": [
{"range": {"content_level": {"gte": 3, "lte": 5}}}
{"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}}
],
"must_not":{
"term":{
"id":topic_id
"must_not": {
"terms": {
"id": have_read_topic_list
}
}
}
......@@ -329,15 +441,15 @@ class TopicUtils(object):
if filter_topic_user_id:
query_function_score["query"]["bool"]["must"].append({"term": {"user_id": topic_user_id}})
if len(topic_tag_list)>0:
query_function_score["query"]["bool"]["should"]={
"terms":{
"tag_list":topic_tag_list
if len(topic_tag_list) > 0:
query_function_score["query"]["bool"]["should"] = {
"terms": {
"tag_list": topic_tag_list
}
}
q["query"]["function_score"] = query_function_score
q["_source"] = {
"includes":["id","group_id","user_id","_score"]
"includes": ["id", "pictorial_id", "user_id", "_score"]
}
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q,
......@@ -349,7 +461,7 @@ class TopicUtils(object):
return []
@classmethod
def get_topic_tag_id_list(cls,topic_id,es_cli_obj=None):
def get_topic_tag_id_list(cls, topic_id, es_cli_obj=None):
"""
:remark 获取帖子标签列表
:param topic_id:
......@@ -361,18 +473,18 @@ class TopicUtils(object):
q = dict()
q["query"] = {
"term":{
"term": {
"id": topic_id
}
}
q["_source"] = {
"includes":[TopicDocumentField.TAG_LIST]
"includes": [TopicDocumentField.TAG_LIST]
}
result_dict = ESPerform.get_search_results(es_cli_obj,sub_index_name="topic",query_body=q,size=1)
result_dict = ESPerform.get_search_results(es_cli_obj, sub_index_name="topic", query_body=q, size=1)
tag_id_list = []
if len(result_dict["hits"])>0:
if len(result_dict["hits"]) > 0:
tag_id_list = result_dict["hits"][0]["_source"][TopicDocumentField.TAG_LIST]
return tag_id_list
......@@ -380,9 +492,8 @@ class TopicUtils(object):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return list()
@classmethod
def get_tag_aggregation_topic_id_list(cls,user_id,tag_id,offset,size):
def get_tag_aggregation_topic_id_list(cls, user_id, tag_id, offset, size):
try:
attention_user_id_list = list()
pick_user_id_list = list()
......@@ -399,8 +510,8 @@ class TopicUtils(object):
functions_list = [
{
"gauss": {
"update_time": {
"linear": {
"create_time": {
"scale": "1d",
"decay": 0.5
}
......@@ -408,34 +519,34 @@ class TopicUtils(object):
}
]
if len(attention_user_id_list)>0:
if len(attention_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":attention_user_id_list}}}},
"should": {"terms": {"user_id": attention_user_id_list}}}},
"weight": 3,
}
)
if len(pick_user_id_list)>0:
if len(pick_user_id_list) > 0:
functions_list.append(
{
"filter": {"bool": {
"should": {"terms":{"user_id":pick_user_id_list}}}},
"should": {"terms": {"user_id": pick_user_id_list}}}},
"weight": 2
}
)
query_function_score = {
"query":{
"bool":{
"query": {
"bool": {
"must": [
#{"range": {"content_level": {"gte": 3, "lte": 5}}},
# {"range": {"content_level": {"gte": 3, "lte": 5}}},
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
{"term": {"tag_list":tag_id}}
{"term": {"tag_list": tag_id}}
],
"must_not":[
{"terms": {"content_level": [1,2]}}
"must_not": [
{"terms": {"content_level": [1, 2]}}
]
}
},
......@@ -446,22 +557,22 @@ class TopicUtils(object):
q = dict()
q["query"] = {
"function_score":query_function_score
"function_score": query_function_score
}
q["_source"] = {
"includes":["id","group_id","user_id","_score","offline_score","manual_score"]
"includes": ["id", "pictorial_id", "user_id", "_score", "offline_score", "manual_score"]
}
q["sort"] = [
{
"_script":{
"type":"number",
"script":{
"_script": {
"type": "number",
"script": {
"lang": "expression",
"source": "_score+doc['offline_score']+doc['manual_score']"
# "lang":"painless",
# "source":"_score+params._source.offline_score+params._source.manual_score"
},
"order":"desc"
"order": "desc"
}
}
]
......@@ -475,22 +586,69 @@ class TopicUtils(object):
return list()
@classmethod
def process_filters(cls, filters):
def process_filters(cls, filters, filter_online=True):
"""处理过滤器部分。"""
f = [
{"term": {"is_online": True}},
{"term": {"is_deleted": False}},
]
logging.info("get filters:%s"%filters)
if not filters:
return f
for k, v in filters.items():
if k == "group_id":
if k == "is_complaint":
f.append({
"term": {k: v},
})
if not v:
continue
if k in ["create_time_gte", "create_time_lte"]:
if k == "create_time_gte":
op = "gte"
elif k == "create_time_lte":
op = "lte"
f.append({
"term": {"group_id": v},
"range": {
"create_time_val": {
op: v,
}
}
})
elif k in ["id_gte", "id_lte"]:
if k == "id_gte":
op = "gte"
elif k == "id_lte":
op = "lte"
f.append({
"range": {
"id": {
op: v,
}
}
})
else:
if isinstance(v, list):
f.append({
"terms": {k: v},
})
else:
f.append({
"term": {k: v},
})
if filter_online:
f.append({"term": {"is_online": True}})
return f
@classmethod
......@@ -511,43 +669,144 @@ class TopicUtils(object):
"""处理排序部分。"""
sort_rule = []
if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM:
sort_rule.append({
"vote_num":{
"order":"desc"
},
"update_time":{
"order":"desc"
},
})
if isinstance(sorts_by, int):
if sorts_by == TOPIC_SEARCH_SORT.VOTE_NUM:
sort_rule.append({
"vote_num": {
"order": "desc"
},
"update_time": {
"order": "desc"
},
})
elif isinstance(sorts_by, list):
for sort_by in sorts_by:
if sort_by == TOPIC_SEARCH_SORT.ID_AEC:
sort_rule.append({
"id": {
"order": "asc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.ID_DESC:
sort_rule.append({
"id": {
"order": "desc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.SCORE_AEC:
sort_rule.append({
"sort_score": {
"order": "asc"
},
})
elif sort_by == TOPIC_SEARCH_SORT.SCORE_DESC:
sort_rule.append({
"sort_score": {
"order": "desc"
},
})
return sort_rule
@classmethod
def list_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10):
def list_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
q = {
"query": {
"bool": {
"must": cls.process_filters(filters),
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
},
"_source": {
"includes":["id"]
},
"sort": [],
}
}
if sorts_by:
sorts = cls.process_sort(sorts_by)
q["sort"] = sorts
if sorts:
q["sort"] = sorts
try:
result_dict = ESPerform.get_search_results(ESPerform.get_cli(), sub_index_name="topic",
query_body=q, offset=offset, size=size)
result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {
"hits": [],
"total_count": 0
}
return result_dict["hits"]
@classmethod
def business_topic_ids(cls, filters, nfilters, sorts_by, offset=0, size=10, index_name="topic", filter_online=True):
must = cls.process_filters(filters, filter_online=filter_online)
query = ''
for k, v in filters.items():
if k == "content":
query = filters[k]
q = {}
q["query"] = {
"function_score": {
"functions": [{
"filter": {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
},
"weight": 1
}],
"query": {
"multi_match": {
"fields":["content"],
"type": "cross_fields",
"operator": "and",
"query": query
}
}
}
}
if query == '':
q["query"] = {
"bool": {
"must": must,
"must_not": cls.process_nfilters(nfilters),
}
}
logging.info("get query business_topic:%s" % q)
if sorts_by:
sorts = cls.process_sort(sorts_by)
if sorts:
q["sort"] = sorts
try:
result_dict = ESPerform.get_search_results(
ESPerform.get_cli(), sub_index_name=index_name,
query_body=q, offset=offset, size=size
)
return {
"hits": result_dict["hits"],
"total_count": result_dict["total_count"]
}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
return {
"hits": [],
"total_count": 0
}
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from gm_rpcd.all import bind
import logging
import traceback
import json
from search.utils.topic import TopicUtils
from libs.es import ESPerform
from libs.cache import redis_client
from search.utils.common import *
from libs.es import ESPerform
@bind("physical/search/business/topic")
def business_topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10):
"""帖子搜索。"""
try:
result_list = TopicUtils.business_topic_ids(
filters=filters, nfilters=nfilters, sorts_by=sorts_by,
offset=offset, size=size, filter_online=False,
index_name="topic"
)
logging.info("get result_lsit:%s"%result_list)
topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {"topic_ids": topic_ids, "total_count": result_list["total_count"]}
except:
logging.error("catch exception, err_msg:%s" % traceback.format_exc())
return {"topic_ids": [], "total_count": 0}
......@@ -12,8 +12,8 @@ from search.utils.common import GroupSortTypes
from libs.es import ESPerform
@bind("physical/search/query_group")
def query_group(query="",offset=0,size=10):
@bind("physical/search/query_pictorial")
def query_group(query="", offset=0, size=10):
"""
:remark:小组搜索排序策略,缺少排序策略
:param query:
......@@ -22,7 +22,7 @@ def query_group(query="",offset=0,size=10):
:return:
"""
try:
result_dict = GroupUtils.get_group_query_result(query,offset,size)
result_dict = GroupUtils.get_group_query_result(query, offset, size)
group_ids_list = []
if len(result_dict["hits"]) > 0:
......@@ -31,10 +31,11 @@ def query_group(query="",offset=0,size=10):
return {"group_ids": group_ids_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_ids":[]}
return {"group_ids": []}
@bind("physical/search/group_sort")
def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=10):
@bind("physical/search/pictorial_sort")
def pictorial_sort(user_id=-1, sort_type=GroupSortTypes.HOT_RECOMMEND, offset=0, size=10):
"""
:remark 小组排序,缺少:前1天发评论人数*x
:param user_id:
......@@ -44,34 +45,165 @@ def group_sort(user_id=-1,sort_type=GroupSortTypes.HOT_RECOMMEND,offset=0,size=1
:return:
"""
try:
if not isinstance(user_id,int):
if not isinstance(user_id, int):
user_id = -1
#获取es链接对象
# 获取es链接对象
es_cli_obj = ESPerform.get_cli()
if sort_type==GroupSortTypes.HOT_RECOMMEND:
group_ids_list = GroupUtils.get_hot_group_recommend_result_list(offset,size,es_cli_obj)
if sort_type == GroupSortTypes.HOT_RECOMMEND:
pictorial_ids_list = GroupUtils.get_hot_pictorial_recommend_result_list(offset, size, es_cli_obj)
return {"group_recommend_ids":group_ids_list}
return {"pictorial_recommend_ids": pictorial_ids_list}
elif sort_type==GroupSortTypes.ATTENTION_RECOMMEND:
attention_group_list = GroupUtils.get_user_attention_group_list(user_id,offset=0,size=1,es_cli_obj=es_cli_obj)
if len(attention_group_list)==0:
return {"group_recommend_ids": []}
elif sort_type == GroupSortTypes.ATTENTION_RECOMMEND:
attention_pictorial_list = GroupUtils.get_user_attention_pictorial_list(user_id, offset=0, size=1,
es_cli_obj=es_cli_obj)
if len(attention_pictorial_list) == 0:
return {"pictorial_recommend_ids": []}
else:
attention_group_id_list = [item["group_id"] for item in attention_group_list]
sorted_group_ids_list = GroupUtils.get_group_ids_by_aggs(attention_group_id_list,es_cli_obj)
attention_pictorial_id_list = [item["pictorial_id"] for item in attention_pictorial_list]
sorted_pictorial_ids_list = GroupUtils.get_pictorial_ids_by_aggs(attention_pictorial_id_list,
es_cli_obj)
pictorial_recommend_ids_list = sorted_pictorial_ids_list
# if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list):
sorted_attention_pictorial_list = sorted(attention_pictorial_list,
key=lambda item: item["update_time_val"], reverse=True)
for item in sorted_attention_pictorial_list:
if item["pictorial_id"] not in pictorial_recommend_ids_list:
pictorial_recommend_ids_list.append(item["pictorial_id"])
return {"pictorial_recommend_ids": pictorial_recommend_ids_list[offset:(offset + size)]}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"pictorial_recommend_ids": []}
@bind("physical/search/pictorial_topic")
def pictorial_topic(topic_id=-1, offset=0, size=10):
"""
:remark 入选画报
:param user_id:
:param sort_type:
:param offset:
:param size:
:return:
"""
try:
if not isinstance(topic_id, int):
user_id = -1
# 获取es链接对象
es_cli_obj = ESPerform.get_cli()
q = {}
# 获取帖子从属的画报
q["query"] = {
"term": {
"id": topic_id
}
}
q["_source"] = {
"include": ["id", "pictorial_id", "tag_list"]
}
result_dict = ESPerform.get_search_results(es_cli_obj, "topic", q, offset, size)
logging.info("get result_dict:%s" % result_dict)
pict_pictorial_ids_list = []
topic_tag_list = []
pictorial_id_list = []
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
pict_pictorial_ids_list = item["_source"]["pictorial_id"]
topic_tag_list = item["_source"]["tag_list"]
q["query"] = {
"bool": {
"must": [{
"terms": {
"id": pict_pictorial_ids_list
}
},
{
"term": {
"is_online": True
}
},
{
"term": {
"is_deleted": False
}
},
]
}
}
q["_source"] = {
"include": ["id", "update_time"]
}
q["sort"] = {
'update_time': {
'order': 'desc'
}
}
result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
pictorial_id = item["_source"]["id"]
pictorial_id_list.append(pictorial_id)
logging.info("get pictorial_id_list:%s" % pictorial_id_list)
logging.info("get topic_tag_list:%s" % topic_tag_list)
if len(pictorial_id_list) < 10:
num = 10 - len(pictorial_id_list)
q["query"] = {
"bool": {
"must": [{
"terms": {
"tag_id": topic_tag_list
}}, {
"term": {
"is_online": True
}
},{
"term": {
"is_deleted": False
}
}]
}
}
q["_source"] = {
"include": ["id", "tag_id"]}
q["sort"] = {
'update_time': {
'order': 'desc'
}}
q["from"] = 0
q["size"] = 10
result_dict = ESPerform.get_search_results(es_cli_obj, "pictorial", q, offset, size)
if len(result_dict["hits"]) > 0:
for item in result_dict["hits"]:
id = item["_source"]["id"]
if id not in pictorial_id_list:
pictorial_id_list.append(id) #
logging.info("get result_dict tag:%s" % result_dict)
logging.info("get pictorial_id_list tag:%s" % pictorial_id_list)
pictorial_list = pictorial_id_list if len(pictorial_id_list) < 10 else pictorial_id_list[:10]
group_recommend_ids_list = sorted_group_ids_list
#if len(group_recommend_ids_list) < size and len(group_recommend_ids_list)<len(attention_group_list):
sorted_attention_group_list = sorted(attention_group_list,key=lambda item:item["update_time_val"],reverse=True)
for item in sorted_attention_group_list:
if item["group_id"] not in group_recommend_ids_list:
group_recommend_ids_list.append(item["group_id"])
return {"pictorial_ids_list": pictorial_list}
return {"group_recommend_ids": group_recommend_ids_list[offset:(offset+size)]}
else:
return {"pictorial_ids_list": pictorial_id_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"group_recommend_ids":[]}
return {"pictorial_ids_list": []}
......@@ -13,7 +13,7 @@ from search.utils.common import *
from libs.es import ESPerform
def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE):
def get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE):
try:
if user_id == -1:
redis_key = "physical:discover_page" + ":user_id:" + str(user_id) + ":device_id:" + device_id
......@@ -21,112 +21,138 @@ def get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.
redis_key = "physical:discover_page" + ":user_id:" + str(user_id)
redis_field_list = [b'have_read_topic_id']
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
have_read_topic_id_list = json.loads(redis_field_val_list[0]) if redis_field_val_list[0] else []
recommend_topic_ids = []
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id=user_id, offset=0, size=size,query_type=query_type,filter_topic_id_list=have_read_topic_id_list)
recommend_topic_ids = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=0, offset=0, size=size,single_size=size,
query_type=query_type,
filter_topic_id_list=have_read_topic_id_list)
for topic_id in topic_id_dict:
recommend_topic_ids.append(topic_id)
have_read_topic_id_list.extend(recommend_topic_ids)
redis_dict = {
"have_read_topic_id": json.dumps(have_read_topic_id_list)
}
redis_client.hmset(redis_key,redis_dict)
redis_client.hmset(redis_key, redis_dict)
return recommend_topic_ids
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_home_recommend_topic_ids(user_id,device_id,offset,size,query=None,query_type=TopicPageType.HOME_RECOMMEND):
def get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query=None,
query_type=TopicPageType.HOME_RECOMMEND):
try:
if query is None:
if user_id == -1:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
redis_key = "physical:home_recommend" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_recommend" + ":user_id:" + str(user_id) + ":query_type:" + str(query_type)
else:
if user_id == -1:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":user_id:" + str(
user_id) + ":device_id:" + device_id + ":query:" + str(query) + ":query_type:" + str(query_type)
else:
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(query) + ":query_type:" + str(query_type)
redis_key = "physical:home_query" + ":user_id:" + str(user_id) + ":query:" + str(
query) + ":query_type:" + str(query_type)
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(redis_key, redis_field_list)
tag_recommend_redis_key = "physical:linucb:tag_recommend:device_id:" + str(device_id)
redis_field_list = [b'last_offset_num', b'unread_topic_id']
for page_id in range(0,offset,size):
redis_field_list.append(str(page_id))
redis_field_val_list = redis_client.hmget(redis_key,redis_field_list)
recommend_tag_list = []
tag_recommend_val = redis_client.get(tag_recommend_redis_key)
if tag_recommend_val:
recommend_tag_list = json.loads(str(tag_recommend_val, encoding="utf-8"))
last_offset_num = int(redis_field_val_list[0]) if redis_field_val_list[0] else -1
recommend_topic_ids = []
have_read_topic_id_list = list()
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id, offset, size*size,query,query_type=query_type)
if redis_field_val_list[0] and query is None:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
user_similar_score_redis_key = "physical:user_similar_score:user_id:" + str(user_id)
redis_user_similar_score_redis_val = redis_client.get(user_similar_score_redis_key)
user_similar_score_redis_list = json.loads(
redis_user_similar_score_redis_val) if redis_user_similar_score_redis_val else []
topic_id_list = TopicUtils.get_recommend_topic_ids(user_id=user_id, tag_id=tag_id, offset=offset, size=size,
single_size=size,query=query, query_type=query_type,
filter_topic_id_list=have_read_topic_id_list,
recommend_tag_list=recommend_tag_list,
user_similar_score_list=user_similar_score_redis_list)
have_read_group_id_set = set()
have_read_user_id_set = set()
unread_topic_id_dict = dict()
have_read_topic_id_set = set()
if redis_field_val_list[1] and offset>0:
if (user_id>0 and offset==last_offset_num) or user_id==-1:
ori_unread_topic_id_dict = json.loads(redis_field_val_list[1])
if len(ori_unread_topic_id_dict) > 0:
topic_id_dict.update(ori_unread_topic_id_dict)
for have_read_item in redis_field_val_list[2:]:
if have_read_item:
have_read_topic_id_set=have_read_topic_id_set.union(json.loads(have_read_item))
# 当前页小组数量
cur_page_group_num = 0
# 当前页用户数量
cur_page_user_num = 0
for topic_id in topic_id_dict:
if topic_id_dict[topic_id] in have_read_group_id_set:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
else:
if topic_id not in have_read_topic_id_set:
if isinstance(topic_id_dict[topic_id],int) and topic_id_dict[topic_id]>0 and cur_page_group_num<(size*0.9):
have_read_group_id_set.add(topic_id_dict[topic_id])
have_read_topic_id_set.add(topic_id)
cur_page_group_num += 1
recommend_topic_ids.append(topic_id)
elif topic_id_dict[topic_id] and cur_page_user_num<(size*0.1):
cur_page_user_num += 1
recommend_topic_ids.append(topic_id)
else:
unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
if len(recommend_topic_ids) >= size:
break
offi_unread_topic_id_dict = dict()
if len(recommend_topic_ids) < size and len(unread_topic_id_dict)>0:
for unread_topic_id in unread_topic_id_dict:
if len(recommend_topic_ids)<size:
recommend_topic_ids.append(unread_topic_id)
else:
offi_unread_topic_id_dict[unread_topic_id] = unread_topic_id_dict[unread_topic_id]
# # 当前页小组数量
# cur_page_group_num = 0
# # 当前页用户数量
# cur_page_user_num = 0
#
# for topic_id in topic_id_dict:
# if topic_id_dict[topic_id][0] in have_read_group_id_set or topic_id_dict[topic_id][
# 1] in have_read_user_id_set:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
# else:
# if isinstance(topic_id_dict[topic_id][0], int) and topic_id_dict[topic_id][
# 0] > 0 and cur_page_group_num < (size * 0.9):
# have_read_group_id_set.add(topic_id_dict[topic_id][0])
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# have_read_topic_id_list.append(topic_id)
# cur_page_group_num += 1
# recommend_topic_ids.append(topic_id)
# elif topic_id_dict[topic_id] and cur_page_user_num < (size * 0.1):
# have_read_user_id_set.add(topic_id_dict[topic_id][1])
# cur_page_user_num += 1
# recommend_topic_ids.append(topic_id)
# have_read_topic_id_list.append(topic_id)
# else:
# unread_topic_id_dict[topic_id] = topic_id_dict[topic_id]
#
# if len(recommend_topic_ids) >= size:
# break
# if len(recommend_topic_ids) < size and len(unread_topic_id_dict) > 0:
# for unread_topic_id in unread_topic_id_dict:
# if len(recommend_topic_ids) < size:
# recommend_topic_ids.append(unread_topic_id)
# have_read_topic_id_list.append(unread_topic_id)
# else:
# break
have_read_topic_id_list.extend(topic_id_list)
if len(have_read_topic_id_list) > 5000:
cut_len = len(have_read_topic_id_list)-5000
have_read_topic_id_list = have_read_topic_id_list[cut_len:]
redis_dict = {
"unread_topic_id":json.dumps(offi_unread_topic_id_dict),
"last_offset_num":offset+size,
offset: json.dumps(recommend_topic_ids)
"have_read_topic_list": json.dumps(have_read_topic_id_list),
}
redis_client.hmset(redis_key,redis_dict)
redis_client.hmset(redis_key, redis_dict)
# 每个session key保存15分钟
redis_client.expire(redis_key,15*60)
redis_client.expire(redis_key, 60 * 60 * 24 * 3)
return recommend_topic_ids
return topic_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@bind("physical/search/query_tag_id_by_topic")
def query_tag_id_by_topic(offset=0, size=10, topic_id_list=[], user_id=-1):
try:
return TopicUtils.get_topic_tag_info(offset, size, topic_id_list, user_id)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
@bind("physical/search/home_recommend")
def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPageType.HOME_RECOMMEND):
def home_recommend(device_id="", user_id=-1, offset=0, size=10, query_type=TopicPageType.HOME_RECOMMEND):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
......@@ -137,20 +163,43 @@ def home_recommend(device_id="",user_id=-1,offset=0,size=10,query_type=TopicPage
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query_type=query_type)
recommend_topic_ids = list()
es_node_load_high_flag = False
try:
es_node_load_high_flag = ESPerform.if_es_node_load_high(ESPerform.get_cli())
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
es_node_load_high_flag = True
if es_node_load_high_flag:
temp_downgrading_key = "physical:home_recommend:user_id:241407656:query_type:1"
redis_field_list = [b'have_read_topic_list']
redis_field_val_list = redis_client.hmget(temp_downgrading_key, redis_field_list)
if redis_field_val_list[0]:
have_read_topic_id_list = list(json.loads(redis_field_val_list[0]))
if len(have_read_topic_id_list) > offset:
recommend_topic_ids = have_read_topic_id_list[offset:offset+size]
else:
recommend_topic_ids = have_read_topic_id_list[0:size]
else:
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id=0, offset=0, size=size,
query_type=query_type)
return {"recommend_topic_ids":recommend_topic_ids}
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/discover_page")
def discover_page(device_id="",user_id=-1,size=10):
def discover_page(device_id="", user_id=-1, size=10):
"""
:remark:首页推荐,目前只推荐日记
:param session_id:
......@@ -161,20 +210,20 @@ def discover_page(device_id="",user_id=-1,size=10):
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_discover_page_topic_ids(user_id,device_id,size,query_type=TopicPageType.FIND_PAGE)
recommend_topic_ids = get_discover_page_topic_ids(user_id, device_id, size, query_type=TopicPageType.FIND_PAGE)
return {"recommend_topic_ids":recommend_topic_ids}
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/home_query")
def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
def home_query(device_id="", tag_id=-1, user_id=-1, query="", offset=0, size=10):
"""
:remark:首页搜索,目前只推荐日记
:param session_id:
......@@ -186,19 +235,20 @@ def home_query(device_id="",user_id=-1,query="",offset=0,size=10):
"""
try:
if not user_id:
user_id=-1
if not isinstance(device_id,str):
user_id = -1
if not isinstance(device_id, str):
device_id = ""
recommend_topic_ids = get_home_recommend_topic_ids(user_id,device_id,offset,size,query)
return {"recommend_topic_ids":recommend_topic_ids}
recommend_topic_ids = get_home_recommend_topic_ids(user_id, device_id, tag_id, offset, size, query)
return {"recommend_topic_ids": recommend_topic_ids}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_ids": []}
@bind("physical/search/topic_detail_page_recommend")
def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_user_id=-1,filter_topic_user_id=False,offset=0,size=10):
def topic_detail_page_recommend(device_id="", user_id=-1, topic_id=-1, topic_pictorial_id=-1, topic_user_id=-1,
filter_topic_user_id=False, offset=0, size=10):
"""
:remark:帖子详情页推荐策略,缺少第一个卡片策略
:param user_id:
......@@ -208,20 +258,33 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
:return:
"""
try:
if not isinstance(user_id,int):
if not isinstance(user_id, int):
user_id = -1
#获取es链接对象
redis_key = "physical:topic_detail_page_recommend" + ":user_id:" + str(user_id) + ":device_id:" + str(device_id)
have_read_topic_redis_data = redis_client.get(redis_key)
have_read_topic_list = json.loads(have_read_topic_redis_data) if have_read_topic_redis_data else []
# 获取es链接对象
es_cli_obj = ESPerform.get_cli()
# 获取帖子标签列表
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id,es_cli_obj)
topic_tag_list = TopicUtils.get_topic_tag_id_list(topic_id, es_cli_obj)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id,topic_id,topic_tag_list,topic_group_id,topic_user_id,filter_topic_user_id,offset,size,es_cli_obj)
result_list = TopicUtils.get_topic_detail_recommend_list(user_id, topic_id, topic_tag_list, topic_pictorial_id,
topic_user_id, filter_topic_user_id,
have_read_topic_list, offset, size, es_cli_obj)
recommend_topic_ids_list = list()
if len(result_list)>0:
if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
have_read_topic_list.extend(recommend_topic_ids_list)
have_read_topic_len = len(have_read_topic_list)
if have_read_topic_len > 5000:
have_read_topic_list = have_read_topic_list[(have_read_topic_len - 5000):]
redis_client.set(redis_key, json.dumps(have_read_topic_list))
return {"recommend_topic_ids": recommend_topic_ids_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -229,7 +292,7 @@ def topic_detail_page_recommend(user_id=-1,topic_id=-1,topic_group_id=-1,topic_u
@bind("physical/search/query_topic_by_tag_aggregation")
def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
def query_topic_by_tag_aggregation(user_id, tag_id, offset, size):
"""
:remark 按标签聚合召回帖子
:param tag_id:
......@@ -243,9 +306,9 @@ def query_topic_by_tag_aggregation(user_id,tag_id, offset, size):
if not tag_id:
tag_id = -1
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id,tag_id,offset,size)
result_list = TopicUtils.get_tag_aggregation_topic_id_list(user_id, tag_id, offset, size)
recommend_topic_ids_list = list()
if len(result_list)>0:
if len(result_list) > 0:
recommend_topic_ids_list = [item["_source"]["id"] for item in result_list]
return {"recommend_topic_id": recommend_topic_ids_list}
......@@ -261,11 +324,15 @@ def topic_search(filters, nfilters=None, sorts_by=None, offset=0, size=10):
try:
result_list = TopicUtils.list_topic_ids(filters=filters, nfilters=nfilters,
sorts_by=sorts_by, offset=offset, size=size)
topic_ids = [item["_source"]["id"] for item in result_list]
return {"topic_ids": topic_ids}
logging.info("get result_list:%s"%result_list)
topic_ids = [item["_source"]["id"] for item in result_list["hits"]]
return {
"topic_ids": topic_ids,
"total_count": result_list["total_count"]
}
except:
logging.error("catch exception, err_msg:%s" % traceback.format_exc())
return {"topic_ids": []}
return {"topic_ids": [], "total_count": 0}
@bind("physical/search/query_topic_by_user_similarity")
......@@ -279,10 +346,10 @@ def query_topic_by_user_similarity(topic_similarity_score_dict, offset=0, size=1
"""
try:
must_topic_id_list = list(topic_similarity_score_dict.keys())
topic_id_dict = TopicUtils.get_recommend_topic_ids(user_id=-1, offset=offset, size=size,must_topic_id_list=must_topic_id_list)
topic_id_list = TopicUtils.get_recommend_topic_ids(tag_id=0, user_id=-1, offset=offset, size=size,single_size=size,
must_topic_id_list=must_topic_id_list)
recommend_topic_ids = list(topic_id_dict.keys())
return {"recommend_topic_ids":recommend_topic_ids}
return {"recommend_topic_ids": topic_id_list}
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {"recommend_topic_id": []}
......@@ -24,8 +24,10 @@ def recommend_user(self_user_id,interesting_user_id,offset=0,size=10):
:return:
"""
try:
if not isinstance(self_user_id,int):
if not isinstance(self_user_id, int):
self_user_id = -1
if not isinstance(interesting_user_id, int):
interesting_user_id = -1
#获取es链接对象
es_cli_obj = ESPerform.get_cli()
......
......@@ -19,6 +19,15 @@ from libs.cache import redis_client
from trans2es.models.face_user_contrast_similar import FaceUserContrastSimilar
import json
from search.utils.topic import TopicUtils
from trans2es.models.pick_topic import PickTopic
from trans2es.models.tag import TopicTag,Tag
from trans2es.models.user_extra import UserExtra
from trans2es.models.group import Group
from trans2es.models.topic import Topic,ActionSumAboutTopic
from search.utils.common import *
from linucb.views.collect_data import CollectData
from injection.data_sync.tasks import sync_user_similar_score
class Job(object):
__es = None
......@@ -40,7 +49,6 @@ class Job(object):
def __call__(self):
type_info = get_type_info_map()[self._type_name]
assert isinstance(type_info, TypeInfo)
result = type_info.insert_table_chunk(
sub_index_name=self._sub_index_name,
table_chunk=self._chunk,
......@@ -97,7 +105,9 @@ class Command(BaseCommand):
make_option('-s', '--pks', dest='pks', help='specify sync pks, comma separated', metavar='PKS', default=''),
make_option('--streaming-slicing', dest='streaming_slicing', action='store_true', default=True),
make_option('--no-streaming-slicing', dest='streaming_slicing', action='store_false', default=True),
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default='')
make_option('-S', '--sync_type',dest='sync_type', help='sync data to es',metavar='TYPE',default=''),
make_option('-T', '--test_score', dest='test_score', help='test_score', metavar='TYPE', default='')
)
def __sync_data_by_type(self, type_name):
......@@ -116,6 +126,68 @@ class Command(BaseCommand):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def generate_topic_score_detail(self):
try:
topic_id_dict = TopicUtils.get_recommend_topic_ids(241432787,0, 0, 500,query_type=TopicPageType.HOME_RECOMMEND,test_score=True)
for topic_id in topic_id_dict:
offline_score = 0.0
user_is_shadow = False
topic_sql_item = Topic.objects.filter(id=topic_id)
user_is_recommend=0.0
# 是否官方推荐用户
user_query_results = UserExtra.objects.filter(user_id=topic_sql_item[0].user_id)
if user_query_results.count() > 0:
if user_query_results[0].is_recommend:
offline_score += 2.0
user_is_recommend = 2.0
elif user_query_results[0].is_shadow:
user_is_shadow = True
group_is_recommend=0.0
# 是否官方推荐小组
# if topic_sql_item[0].group and topic_sql_item[0].group.is_recommend:
# offline_score += 4.0
# group_is_recommend = 4.0
topic_level_score = 0.0
# 帖子等级
if topic_sql_item[0].content_level == '5':
offline_score += 6.0
topic_level_score = 6.0
elif topic_sql_item[0].content_level == '4':
offline_score += 5.0
topic_level_score = 5.0
elif topic_sql_item[0].content_level == '3':
offline_score += 2.0
topic_level_score = 2.0
exposure_count = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=1).count()
click_count = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.filter(topic_id=topic_id, data_type=3).count()
exposure_score = 0.0
uv_score = 0.0
if exposure_count > 0:
offline_score += click_count / exposure_count
exposure_score = click_count / exposure_count
if uv_num > 0:
offline_score += (topic_sql_item[0].vote_num / uv_num + topic_sql_item[0].reply_num / uv_num)
uv_score = (topic_sql_item[0].vote_num / uv_num + topic_sql_item[0].reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
"""
if user_is_shadow:
offline_score = offline_score * 0.5
logging.info("test_score######topic_id:%d,score:%f,offline_score:%f,user_is_recommend:%f,group_is_recommend:%f,topic_level_score:%f,exposure_score:%f,uv_score:%f"
% (topic_id,topic_id_dict[topic_id][2],offline_score,user_is_recommend,group_is_recommend,topic_level_score,exposure_score,uv_score))
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
def handle(self, *args, **options):
try:
type_name_list = get_type_info_map().keys()
......@@ -129,6 +201,14 @@ class Command(BaseCommand):
if len(options["sync_type"]) and options["sync_type"]=="sync_data_to_es":
SyncDataToRedis.sync_face_similar_data_to_redis()
if len(options["test_score"]):
self.generate_topic_score_detail()
if len(options["sync_type"]) and options["sync_type"]=="linucb":
collect_obj = CollectData()
collect_obj.consume_data_from_kafka()
if len(options["sync_type"]) and options["sync_type"]=="similar":
sync_user_similar_score()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -41,11 +41,12 @@ class Command(BaseCommand):
official_index_name = ESPerform.get_official_index_name(type_name)
index_exists = es_cli.indices.exists(official_index_name)
if not index_exists:
logging.info("begin create [%s] index and mapping!" % type_name)
logging.info("begin create [%s] index!" % type_name)
ESPerform.create_index(es_cli, type_name)
ESPerform.put_index_mapping(es_cli, type_name)
else:
logging.warning("index:[%s] has already existing!" % type_name)
logging.info("begin create [%s] mapping!" % type_name)
ESPerform.put_index_mapping(es_cli, type_name, force_sync=True)
if len(options["indices_template"]):
template_file_name = options["indices_template"]
......
{
"index_patterns": ["*"],
"settings":{
"number_of_shards": 9,
"number_of_replicas": 2,
"number_of_shards": 5,
"number_of_replicas": 3,
"index":{
"analysis":{
"filter":{
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"is_recommend":{"type":"boolean"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_num":{"type":"long"},
"creator_id":{"type":"long"},
"icon":{"type":"text"},
"high_quality_topic_num":{"type":"long"},//前一天该小组4&5星帖子数量
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"tag_id":{"type":"long"},
"tag_name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"topic_id_list":{"type":"long"}
}
}
\ No newline at end of file
......@@ -19,6 +19,9 @@
},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"tag_type":{"type":"long"},
"collection":{"type":"long"},
"is_ai":{"type":"long"},
"is_own":{"type":"long"},
"is_online":{"type":"keyword"},//上线
"is_deleted":{"type":"keyword"},
"near_new_topic_num":{"type":"long","store": true}
......
{
"dynamic":"strict",
"properties": {
"id":{"type":"long"},
"is_online":{"type":"boolean"},//上线
"is_deleted":{"type":"boolean"},
"vote_num":{"type":"long"},
"reply_num":{"type":"long"},
"name":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"description":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"content_level":{"type":"text"},
"user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
......@@ -13,15 +13,41 @@
"user_id":{"type":"long"},
"group_id":{"type":"long"}, //所在组ID
"tag_list":{"type":"long"},//标签属性
"edit_tag_list":{"type":"long"},//编辑标签
"tag_name_list":{"type":"text","analyzer":"gm_default_index","search_analyzer":"gm_default_index"},
"share_num":{"type":"long"},
"pick_id_list":{"type":"long"},
"offline_score":{"type":"double"},//离线算分
"manual_score":{"type":"double"},//人工赋分
"has_image":{"type":"boolean"},//是否有图
"has_video":{"type":"boolean"},//是否是视频
"create_time":{"type":"date", "format":"date_time_no_millis"},
"update_time":{"type":"date", "format":"date_time_no_millis"},
"create_time_val":{"type":"long"},
"update_time_val":{"type":"long"}
"update_time_val":{"type":"long"},
"language_type":{"type":"long"},
"is_shadow": {"type": "boolean"},
"is_recommend": {"type": "boolean"},
"is_complaint": {"type": "boolean"}, // 是否被举报
"virtual_content_level":{"type": "text"},
"like_num_crawl": {"type": "long"}, // 爬取点赞数
"comment_num_crawl": {"type": "long"}, // 爬取评论数
"is_crawl": {"type": "boolean"},
"platform": {"type": "long"},
"platform_id": {"type": "long"},
"drop_score":{"type": "double"}, // 人工降分
"sort_score":{"type": "double"}, // 排序分
"pictorial_id":{"type": "long"}, //所在组ID
"pictorial_name":{ // 所在组名称
"type": "text",
"analyzer": "gm_default_index",
"search_analyzer": "gm_default_index"
}
}
}
\ No newline at end of file
}
......@@ -28,17 +28,31 @@
"country_id":{"type":"text"}
}
},
"same_group_user_id_list":{//同组用户列表
// "same_group_user_id_list":{//同组用户列表
// "type":"nested",
// "properties":{
// "user_id":{"type":"long"},
// "country_id":{"type":"text"}
// }
// },
// "attention_group_id_list":{//关注小组列表
// "type":"nested",
// "properties":{
// "group_id":{"type":"long"},
// "update_time_val":{"type":"long"}
// }
// },
"same_pictorial_user_id_list":{//同画报用户列表
"type":"nested",
"properties":{
"user_id":{"type":"long"},
"country_id":{"type":"text"}
}
},
"attention_group_id_list":{//关注小组列表
"attention_pictorial_id_list":{//关注画报列表
"type":"nested",
"properties":{
"group_id":{"type":"long"},
"pictorial_id":{"type":"long"},
"update_time_val":{"type":"long"}
}
},
......
......@@ -12,4 +12,5 @@ from .tag import AccountUserTag
from .user import User
from .group import Group
from .topic import Topic
from .pictorial import PictorialFollow
from .pictorial import Pictorial
......@@ -30,7 +30,7 @@ class Celebrity(models.Model):
def get_pick_id_list(self):
try:
pick_id_list = list()
query_results = PickCelebrity.objects.filter(celebrity_id=self.id,is_deleted=False)
query_results = PickCelebrity.objects.using(settings.SLAVE_DB_NAME).filter(celebrity_id=self.id,is_deleted=False)
for data_item in query_results:
pick_id_list.append(data_item.pick_id)
......
......@@ -27,3 +27,19 @@ class FaceUserContrastSimilar(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
contrast_user_id = models.IntegerField(verbose_name=u'对照者用户ID')
class UserSimilarScore(models.Model):
class Meta:
verbose_name=u"首页推荐用"
db_table="user_similar_score"
id = models.IntegerField(verbose_name=u"主键ID",primary_key=True)
is_deleted = models.BooleanField(verbose_name=u"是否删除")
user_id = models.IntegerField(verbose_name=u"用户ID")
contrast_user_id = models.BigIntegerField(verbose_name="参数对比的用户id", db_index=True)
score = models.FloatField(verbose_name='相似度', default=0)
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
......@@ -27,15 +27,15 @@ class Group(models.Model):
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
#获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
return topic_num
# #获取前一天4,5星发帖数
# def get_high_quality_topic_num(self):
# yesterday = datetime.datetime.now()-datetime.timedelta(days=1)
# yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
# yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
#
# topic_num = self.group_topics.filter(content_level__in=("4","5"),create_time__gte=yesterday_begin_time,create_time__lte=yesterday_end_time).count()
#
# return topic_num
def detail(self):
result = {
......
from django.db import models
import datetime
import logging
import traceback
from .tag import Tag
from .topic import Topic
class PictorialFollow(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报用户关系"
app_label = "community"
db_table = "community_pictorial_follow"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
user_id = models.BigIntegerField(verbose_name=u'用户ID')
class PictorialTopics(models.Model):
"""画报帖子关系"""
class Meta:
verbose_name = u'画报帖子关系'
app_label = 'community'
db_table = 'community_pictorial_topic'
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
class Pictorial(models.Model):
"""画报关注"""
class Meta:
verbose_name = u"画报"
app_label = "community"
db_table = "community_pictorial"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
is_recommend = models.BooleanField(verbose_name=u'推荐')
name = models.CharField(verbose_name=u'画报名称', max_length=100)
description = models.CharField(verbose_name=u'画报描述', max_length=200)
creator_id = models.BigIntegerField(verbose_name=u'画报用户ID')
icon = models.CharField(verbose_name=u'画报名称', max_length=255)
topic_num = models.IntegerField(verbose_name=u'次数')
def get_topic_id(self):
try:
topic_id =[]
topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
for i in topic_id_list:
topic_id.append(i)
return topic_id
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
# 获取前一天4,5星发帖数
def get_high_quality_topic_num(self):
try:
yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
yesterday_begin_time = "%s-%s-%s 00:00:00" % (yesterday.year, yesterday.month, yesterday.day)
yesterday_end_time = "%s-%s-%s 23:59:59" % (yesterday.year, yesterday.month, yesterday.day)
topic_id_list = PictorialTopics.objects.filter(pictorial_id=self.id).values_list("topic_id", flat=True)
topic_num = Topic.objects.filter(content_level__in=["4", "5"], create_time__gte=yesterday_begin_time,
create_time__lte=yesterday_end_time, id__in=topic_id_list).count()
return topic_num
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def get_tag_by_id(self):
try:
tag_id_list = []
tags = PictorialTag.objects.filter(pictorial_id=self.id, is_online=True).values_list("tag_id", flat=True)
for i in tags:
tag_id_list.append(i)
return tag_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
def get_tag_by_name(self, tag_id):
try:
tag_name_list = []
tags = Tag.objects.filter(id__in=tag_id, is_online=True).values_list("name", flat=True)
for i in tags:
tag_name_list.append(i)
logging.info("get tags name i:%s" % i)
return tag_name_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return False
class PictorialTag(models.Model):
"""画报关注标签"""
class Meta:
verbose_name = u"画报标签"
app_label = "community"
db_table = "community_pictorial_tag"
id = models.IntegerField(verbose_name=u'关注ID', primary_key=True)
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID', max_length=20)
tag_id = models.BigIntegerField(verbose_name=u'标签ID', max_length=20)
is_online = models.BooleanField(verbose_name=u'是否上线', max_length=1)
......@@ -45,6 +45,9 @@ class Tag(models.Model):
name = models.CharField(verbose_name=u"标签名称",max_length=128)
description = models.TextField(verbose_name=u"标签描述")
icon_url=models.CharField(verbose_name=u"icon_url",max_length=120)
collection = models.IntegerField(verbose_name=u"是否编辑")
is_ai = models.IntegerField(verbose_name=u"是否ai")
is_own = models.IntegerField(verbose_name=u"是否ins上自带")
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, absolute_import, print_function
from django.conf import settings
from django.core.management.base import BaseCommand, CommandError
......@@ -9,35 +10,37 @@ from libs.es import ESPerform
from django.db import models
import datetime
from alpha_types.venus import GRAP_PLATFORM
from .pick_topic import PickTopic
from .tag import TopicTag,Tag
from .tag import TopicTag, Tag
from .user_extra import UserExtra
from .group import Group
class ActionSumAboutTopic(models.Model):
class Meta:
verbose_name=u"帖子埋点数据汇总"
db_table="action_sum_about_topic"
verbose_name = u"帖子埋点数据汇总"
db_table = "action_sum_about_topic"
partiton_date = models.CharField(verbose_name=u"日期",max_length=20)
device_id = models.CharField(verbose_name=u"用户设备号",max_length=50)
topic_id = models.CharField(verbose_name=u"帖子ID",max_length=50)
user_id = models.CharField(verbose_name=u"用户ID",max_length=50)
partiton_date = models.CharField(verbose_name=u"日期", max_length=20)
device_id = models.CharField(verbose_name=u"用户设备号", max_length=50)
topic_id = models.CharField(verbose_name=u"帖子ID", max_length=50)
user_id = models.CharField(verbose_name=u"用户ID", max_length=50)
data_type = models.IntegerField(verbose_name=u"动作类型")
data_value = models.BigIntegerField(verbose_name=u"值")
class TopicImage(models.Model):
class Meta:
verbose_name = u'日记图片'
db_table = 'topic_image'
id = models.IntegerField(verbose_name='日记图片ID',primary_key=True)
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
topic_id = models.IntegerField(verbose_name=u'日记ID')
url = models.CharField(verbose_name=u'图片URL',max_length=300)
url = models.CharField(verbose_name=u'图片URL', max_length=300)
is_online = models.BooleanField(verbose_name='是否上线')
is_deleted = models.BooleanField(verbose_name='是否删除')
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
......@@ -46,33 +49,67 @@ class Topic(models.Model):
verbose_name = u'日记'
db_table = 'topic'
id = models.IntegerField(verbose_name=u'日记ID',primary_key=True)
name = models.CharField(verbose_name=u'日记名称',max_length=100)
#group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
group = models.ForeignKey(
Group, verbose_name=u"关联的小组", related_name=u"group_topics",null=True, blank=True, default=None, on_delete=models.CASCADE)
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
name = models.CharField(verbose_name=u'日记名称', max_length=100)
# group_id = models.IntegerField(verbose_name='用户所在组ID',default=-1)
# group = models.ForeignKey(
# Group, verbose_name=u"关联的小组", related_name=u"group_topics", null=True, blank=True, default=None,
# on_delete=models.CASCADE)
user_id = models.IntegerField(verbose_name=u'用户ID')
drop_score = models.IntegerField(verbose_name=u'人工赋分',default=0)
description = models.CharField(verbose_name=u'日记本描述',max_length=200)
content = models.CharField(verbose_name=u'日记本内容',max_length=1000)
has_video = models.BooleanField(verbose_name=u'是否是视频日记')
drop_score = models.IntegerField(verbose_name=u'人工赋分', default=0)
description = models.CharField(verbose_name=u'日记本描述', max_length=200)
content = models.CharField(verbose_name=u'日记本内容', max_length=1000)
share_num = models.IntegerField(verbose_name='')
vote_num = models.IntegerField(verbose_name=u'点赞数')
reply_num = models.IntegerField(verbose_name=u'回复数')
cover = models.CharField(verbose_name='',max_length=200)
cover = models.CharField(verbose_name='', max_length=200)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
content_level = models.CharField(verbose_name=u'内容等级',max_length=3)
create_time = models.DateTimeField(verbose_name=u'日记创建时间',default=datetime.datetime.fromtimestamp(0))
content_level = models.CharField(verbose_name=u'内容等级', max_length=3)
language_type = models.IntegerField(verbose_name=u'语种类型')
create_time = models.DateTimeField(verbose_name=u'日记创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'日记更新时间', default=datetime.datetime.fromtimestamp(0))
is_crawl = models.BooleanField(verbose_name="是否是爬回的数据", default=False)
has_image = models.BooleanField(verbose_name=u"是否有图片", default=False)
virtual_content_level = models.CharField(verbose_name=u'模拟内容等级', max_length=100, default='')
like_num_crawl = models.IntegerField(verbose_name=u'爬取点赞数', default=0)
comment_num_crawl = models.IntegerField(verbose_name=u'爬取评论数', default=0)
is_recommend = models.IntegerField(verbose_name=u'是否推荐 0 非推荐 1 推荐', default=0)
sort_score = models.FloatField(verbose_name='排序分', default=0)
is_shadow = models.BooleanField(verbose_name="是否是马甲账户", default=False)
platform = models.IntegerField(verbose_name=u'平台来源', choices=GRAP_PLATFORM, default=GRAP_PLATFORM.ALPHA)
platform_id = models.BigIntegerField(verbose_name='用平台ID', null=True)
def get_pictorial_id(self):
try:
pictorial_id_list =[]
pictorial_id = PictorialTopic.objects.filter(topic_id=self.id).values_list("pictorial_id",flat=True)
for i in pictorial_id:
pictorial_id_list.append(i)
return pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
@property
def is_complaint(self):
"""是否被举报"""
if TopicComplaint.objects.filter(topic_id=self.id, is_online=True).exists():
return True
return False
def topic_has_image(self):
try:
has_image = False
query_list = TopicImage.objects.filter(topic_id=self.id,is_deleted=False,is_online=True)
if len(query_list)>0:
has_image=True
query_list = TopicImage.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False,
is_online=True)
if len(query_list) > 0:
has_image = True
return has_image
except:
......@@ -82,7 +119,7 @@ class Topic(models.Model):
def get_pick_id_info(self):
try:
pick_id_list = list()
query_list = PickTopic.objects.filter(topic_id=self.id,is_deleted=False)
query_list = PickTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id, is_deleted=False)
for item in query_list:
pick_id_list.append(item.pick_id)
......@@ -94,23 +131,33 @@ class Topic(models.Model):
def get_topic_tag_id_list(self):
try:
topic_tag_id_list = list()
query_results = TopicTag.objects.filter(topic_id=self.id)
for item in query_results:
topic_tag_id_list.append(item.tag_id)
return topic_tag_id_list
edit_tag_id_list = list()
tag_id_list = TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id).values_list("tag_id",
flat=True)
tag_query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list)
for tag_item in tag_query_results:
is_online = tag_item.is_online
is_deleted = tag_item.is_deleted
collection = tag_item.collection
if is_online and not is_deleted:
topic_tag_id_list.append(tag_item.id)
if collection:
edit_tag_id_list.append(tag_item.id)
return (topic_tag_id_list, edit_tag_id_list)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
return ([], [])
def get_tag_name_list(self,tag_id_list):
def get_tag_name_list(self, tag_id_list):
try:
tag_name_list = list()
query_results = Tag.objects.filter(id__in=tag_id_list).values_list("name",flat=True)
for item in query_results:
tag_name_list.append(item)
for i in range(0, len(tag_name_list), 1000):
query_results = Tag.objects.using(settings.SLAVE_DB_NAME).filter(id__in=tag_id_list[i:i + 1000])
for item in query_results:
tag_name_list.append(item)
return tag_name_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......@@ -121,8 +168,8 @@ class Topic(models.Model):
offline_score = 0.0
user_is_shadow = False
#是否官方推荐用户
user_query_results = UserExtra.objects.filter(user_id=self.user_id)
# 是否官方推荐用户
user_query_results = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id)
if user_query_results.count() > 0:
if user_query_results[0].is_recommend:
offline_score += 2.0
......@@ -130,32 +177,70 @@ class Topic(models.Model):
user_is_shadow = True
# 是否官方推荐小组
if self.group and self.group.is_recommend:
offline_score += 4.0
# if self.group and self.group.is_recommend:
# offline_score += 4.0
#帖子等级
# 帖子等级
if self.content_level == '5':
offline_score += 5.0
offline_score += 6.0
elif self.content_level == '4':
offline_score += 3.0
offline_score += 5.0
elif self.content_level == '3':
offline_score += 2.0
exposure_count = ActionSumAboutTopic.objects.filter(topic_id=self.id,data_type=1).count()
click_count = ActionSumAboutTopic.objects.filter(topic_id=self.id, data_type=2).count()
uv_num = ActionSumAboutTopic.objects.filter(topic_id=self.id,data_type=3).count()
exposure_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=1).count()
click_count = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=2).count()
uv_num = ActionSumAboutTopic.objects.using(settings.SLAVE_DB_NAME).filter(topic_id=self.id,
data_type=3).count()
if exposure_count>0:
offline_score += click_count/exposure_count
if uv_num>0:
offline_score += (self.vote_num/uv_num + self.reply_num/uv_num)
if exposure_count > 0:
offline_score += click_count / exposure_count
if uv_num > 0:
offline_score += (self.vote_num / uv_num + self.reply_num / uv_num)
"""
1:马甲账号是否对总分降权?
"""
if user_is_shadow:
offline_score = offline_score*0.5
offline_score = offline_score * 0.5
offline_score -= self.drop_score
return offline_score
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return 0.0
\ No newline at end of file
return 0.0
class TopicComplaint(models.Model):
"""帖子举报"""
class Meta:
verbose_name = u'帖子举报'
app_label = 'community'
db_table = 'topic_complaint'
id = models.IntegerField(verbose_name='日记图片ID', primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户ID', db_index=True)
topic = models.ForeignKey(
Topic, verbose_name=u"关联的帖子", null=True, blank=True, default=None, on_delete=models.CASCADE,
related_name='complaints')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
class PictorialTopic(models.Model):
"""画报帖子关系"""
class Meta:
verbose_name = u'画报帖子关系'
app_label = 'community'
db_table = 'community_pictorial_topic'
id = models.IntegerField(verbose_name=u'日记ID', primary_key=True)
pictorial_id = models.BigIntegerField(verbose_name=u'画报ID')
topic_id = models.BigIntegerField(verbose_name=u'帖子ID')
is_online = models.BooleanField(verbose_name=u"是否有效", default=True)
is_online = models.BooleanField(verbose_name=u'是否上线')
is_deleted = models.BooleanField(verbose_name=u'是否删除')
......@@ -15,14 +15,14 @@ from .group_user_role import GroupUserRole
from .tag import AccountUserTag
from .topic import Topic
from .user_extra import UserExtra
from .pictorial import PictorialFollow
class User(models.Model):
class Meta:
verbose_name = u'用户'
db_table = 'account_user'
id = models.IntegerField(verbose_name="主键ID",primary_key=True)
id = models.IntegerField(verbose_name="主键ID", primary_key=True)
user_id = models.BigIntegerField(verbose_name=u'用户id', unique=True)
nick_name = models.CharField(verbose_name=u'昵称', max_length=255, default='')
profile_pic = models.CharField(verbose_name=u'头像', max_length=300)
......@@ -31,56 +31,58 @@ class User(models.Model):
country_id = models.CharField(verbose_name=u'国家id', max_length=40)
is_online = models.BooleanField(verbose_name="是否上线")
is_deleted = models.BooleanField(verbose_name='是否删除')
create_time = models.DateTimeField(verbose_name=u'创建时间',default=datetime.datetime.fromtimestamp(0))
create_time = models.DateTimeField(verbose_name=u'创建时间', default=datetime.datetime.fromtimestamp(0))
update_time = models.DateTimeField(verbose_name=u'更新时间', default=datetime.datetime.fromtimestamp(0))
def get_is_recommend_flag(self):
is_shadow = False
is_recommend = False
query_sql = UserExtra.objects.filter(user_id=self.user_id,is_deleted=False,is_online=True)
query_sql = UserExtra.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id, is_deleted=False, is_online=True)
for record in query_sql:
is_recommend = record.is_recommend
is_shadow = record.is_shadow
return (is_recommend,is_shadow)
return (is_recommend, is_shadow)
def get_latest_topic_time_val(self):
latest_topic_time_val = -1
# 获取该用户最新发帖时间
topic_records = Topic.objects.filter(user_id=self.user_id).order_by("-update_time").values_list("update_time",flat=True).first()
topic_records = Topic.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).order_by("-update_time").values_list("update_time",
flat=True).first()
if topic_records:
tzlc_topic_update_time = tzlc(topic_records)
latest_topic_time_val = int(time.mktime(tzlc_topic_update_time.timetuple()))
return latest_topic_time_val
def get_follow_user_id_list(self):
follow_user_id_list = list()
user_follows = self.userfollow.filter(is_online=True)
for user_follow in user_follows:
follow_user_id_list.append(user_follow.follow_id)
follow_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=follow_user_id_list)
for detail_data in sql_data_list:
item = {
"user_id":detail_data.user_id,
"country_id":detail_data.country_id
}
follow_user_detail_list.append(item)
logging.info("get follow_user_id_list :%s" % follow_user_id_list)
for i in range(0, len(follow_user_id_list), 1000):
logging.info("get follow_user_id_list :%s" % follow_user_id_list[i:i + 1000])
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=follow_user_id_list[i:i + 1000])
for detail_data in sql_data_list:
item = {
"user_id": detail_data.user_id,
"country_id": detail_data.country_id
}
follow_user_detail_list.append(item)
return follow_user_detail_list
def get_attention_group_id_list(self):
try:
attention_group_id_list = list()
query_results = GroupUserRole.objects.filter(is_online=True,user_id=self.user_id)
query_results = GroupUserRole.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True, user_id=self.user_id)
for item in query_results:
item_dict = {
"group_id": item.group_id,
"update_time_val":time.mktime(tzlc(item.update_time).timetuple())
"update_time_val": time.mktime(tzlc(item.update_time).timetuple())
}
attention_group_id_list.append(item_dict)
......@@ -89,55 +91,87 @@ class User(models.Model):
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_attention_pictorial_id_list(self):
try:
attention_pictorial_id_list = list()
query_results = PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(is_online=True, user_id=self.user_id)
logging.info("get PictorialFollow:%s"%query_results)
for item in query_results:
item_dict = {
"pictorial_id": item.pictorial_id,
"update_time_val": time.mktime(tzlc(item.update_time).timetuple())
}
attention_pictorial_id_list.append(item_dict)
logging.info("get user_id:%s" %self.user_id)
logging.info("get same_pictorial_user_id_list:%s" % attention_pictorial_id_list)
return attention_pictorial_id_list
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
def get_pick_user_id_list(self):
pick_topic_id_list = list()
user_picks = self.user_pick.filter(is_deleted=False,is_pick=True)
user_picks = self.user_pick.using(settings.SLAVE_DB_NAME).filter(is_deleted=False, is_pick=True)
for user_pick in user_picks:
pick_topic_id_list.append(user_pick.picktopic_id)
pick_user_id_list = []
topic_sql_list = Topic.objects.filter(id__in=pick_topic_id_list)
for topic_data in topic_sql_list:
pick_user_id_list.append(topic_data.user_id)
for i in range(0, len(pick_topic_id_list), 1000):
topic_sql_list = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=pick_topic_id_list[i:i + 1000])
for topic_data in topic_sql_list:
pick_user_id_list.append(topic_data.user_id)
pick_user_id_list = tuple(pick_user_id_list)
pick_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=pick_user_id_list)
for detail_data in sql_data_list:
item = {
"user_id":detail_data.user_id,
"country_id":detail_data.country_id
}
pick_user_detail_list.append(item)
return pick_user_detail_list
def get_same_group_user_id_list(self):
same_group_user_id_list = list()
group_items_list = GroupUserRole.objects.filter(user_id=self.user_id)
for group_item in group_items_list:
group_id = group_item.group_id
user_items_list = GroupUserRole.objects.filter(group_id=group_id)
for user_items_list in user_items_list:
same_group_user_id_list.append(user_items_list.user_id)
for i in range(0, len(pick_user_id_list), 1000):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=pick_user_id_list[i:i + 1000])
for detail_data in sql_data_list:
item = {
"user_id": detail_data.user_id,
"country_id": detail_data.country_id
}
pick_user_detail_list.append(item)
same_group_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=same_group_user_id_list)
for detail_data in sql_data_list:
item = {
"user_id":detail_data.user_id,
"country_id":detail_data.country_id
}
same_group_detail_list.append(item)
return pick_user_detail_list
return same_group_detail_list
def get_same_pictorial_user_id_list(self):
#todo 有的同组数据过大,导致celery cpu过高,暂时限制同组的数据大小,后续可能会去掉同组的数据
same_pictorial_user_id_list = list()
pictorial_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(user_id=self.user_id).values_list("pictorial_id",flat=True))
for pictorial_id in pictorial_items_list:
user_items_list = list(PictorialFollow.objects.using(settings.SLAVE_DB_NAME).filter(pictorial_id=pictorial_id).values_list("user_id",flat=True))
for user_id in user_items_list:
same_pictorial_user_id_list.append(user_id)
if len(same_pictorial_user_id_list)>=100:
break
if len(same_pictorial_user_id_list)>=100:
break
logging.info("get same user_id:%s"%self.user_id)
logging.info("get same_pictorial_user_id_list:%s"%same_pictorial_user_id_list)
same_pictorial_detail_list = list()
for i in range(0, len(same_pictorial_user_id_list), 200):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=same_pictorial_user_id_list[i:i + 1000])
for detail_data in sql_data_list:
item = {
"user_id": detail_data.user_id,
"country_id": detail_data.country_id
}
same_pictorial_detail_list.append(item)
return same_pictorial_detail_list
def get_user_tag_id_list(self):
try:
user_tag_id_list = list()
query_results = AccountUserTag.objects.filter(user=self.user_id,is_deleted=False)
query_results = AccountUserTag.objects.using(settings.SLAVE_DB_NAME).filter(user=self.user_id, is_deleted=False)
for item in query_results:
user_tag_id_list.append(item.tag_id)
......@@ -145,4 +179,3 @@ class User(models.Model):
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return []
......@@ -12,11 +12,12 @@ import elasticsearch
import elasticsearch.helpers
import sys
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar
from trans2es.models import topic, user, pick_celebrity, group, celebrity, tag, contrast_similar,pictorial
from trans2es.utils.user_transfer import UserTransfer
from trans2es.utils.pick_celebrity_transfer import PickCelebrityTransfer
from trans2es.utils.group_transfer import GroupTransfer
from trans2es.utils.topic_transfer import TopicTransfer
from trans2es.utils.pictorial_transfer import PictorialTransfer
from trans2es.utils.celebrity_transfer import CelebrityTransfer
from trans2es.utils.tag_transfer import TagTransfer
from trans2es.utils.contrast_similar_transfer import Contrast_Similar_Transfer
......@@ -172,57 +173,82 @@ class TypeInfo(object):
)
def insert_table_by_pk_list(self, sub_index_name, pk_list, es=None, use_batch_query_set=False):
begin = time.time()
if use_batch_query_set:
qs = self.queryset
else:
qs = self.model.objects.all()
end = time.time()
time0=end-begin
begin = time.time()
instance_list = qs.filter(pk__in=pk_list)
end = time.time()
time1=end-begin
begin = time.time()
data_list = self.bulk_get_data(instance_list)
end = time.time()
time2=end-begin
begin = time.time()
logging.info("get sub_index_name:%s"%sub_index_name)
logging.info("get data_list:%s"%data_list)
self.elasticsearch_bulk_insert_data(
sub_index_name=sub_index_name,
data_list=data_list,
es=es,
)
end = time.time()
time3=end-begin
logging.info("duan add,insert_table_by_pk_list time cost:%ds,%ds,%ds,%ds" % (time0,time1,time2,time3))
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
start_clock = time.clock()
start_time = time.time()
instance_list = list(table_chunk)
stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list)
stage_2_time = time.time()
es_result = ESPerform.es_helpers_bulk(
es_cli=es,
data_list=data_list,
sub_index_name=sub_index_name,
auto_create_index=True
)
stage_3_time = time.time()
end_clock = time.clock()
return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}').format(
datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
index_prefix=sub_index_name,
type_name=self.name,
pk_start=repr(table_chunk.get_pk_start()),
pk_stop=repr(table_chunk.get_pk_stop()),
count=len(instance_list),
stage_1_duration=stage_1_time - start_time,
stage_2_duration=stage_2_time - stage_1_time,
stage_3_duration=stage_3_time - stage_2_time,
clock_duration=end_clock - start_clock,
response=es_result,
)
def insert_table_chunk(self, sub_index_name, table_chunk, es=None):
try:
start_clock = time.clock()
start_time = time.time()
instance_list = list(table_chunk)
stage_1_time = time.time()
data_list = self.bulk_get_data(instance_list)
stage_2_time = time.time()
es_result = ESPerform.es_helpers_bulk(
es_cli=es,
data_list=data_list,
sub_index_name=sub_index_name,
auto_create_index=True
)
logging.info("es_helpers_bulk,sub_index_name:%s,data_list len:%d" % (sub_index_name,len(data_list)))
stage_3_time = time.time()
end_clock = time.clock()
return ('{datetime} {index_prefix} {type_name:10s} {pk_start:>15s} {pk_stop:>15s} {count:5d} '
'{stage_1_duration:6.3f} {stage_2_duration:6.3f} {stage_3_duration:6.3f} {clock_duration:6.3f} '
'{response}').format(
datetime=datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%f'),
index_prefix=sub_index_name,
type_name=self.name,
pk_start=repr(table_chunk.get_pk_start()),
pk_stop=repr(table_chunk.get_pk_stop()),
count=len(instance_list),
stage_1_duration=stage_1_time - start_time,
stage_2_duration=stage_2_time - stage_1_time,
stage_3_duration=stage_3_time - stage_2_time,
clock_duration=end_clock - start_clock,
response=es_result,
)
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return None
_get_type_info_map_result = None
......@@ -234,6 +260,7 @@ def get_type_info_map():
return _get_type_info_map_result
type_info_list = [
TypeInfo(
name='topic', # 日记
type='topic',
......@@ -244,6 +271,16 @@ def get_type_info_map():
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name='topic-v1', # 日记
type='topic-v1',
model=topic.Topic,
query_deferred=lambda: topic.Topic.objects.all().query,
get_data_func=TopicTransfer.get_topic_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
),
TypeInfo(
name="user", # 用户
type="user",
......@@ -308,7 +345,18 @@ def get_type_info_map():
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2
),
TypeInfo(
name="pictorial", # 画报
type="pictorial",
model=pictorial.Pictorial,
query_deferred=lambda: pictorial.Pictorial.objects.all().query,
get_data_func=PictorialTransfer.get_poctorial_data,
bulk_insert_chunk_size=100,
round_insert_chunk_size=5,
round_insert_period=2,
)
]
type_info_map = {
......@@ -318,3 +366,4 @@ def get_type_info_map():
_get_type_info_map_result = type_info_map
return type_info_map
......@@ -33,7 +33,7 @@ class GroupTransfer(object):
update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
# res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
return res
except:
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import sys
import logging
import traceback
from libs.tools import tzlc
class PictorialTransfer(object):
def __init__(self):
pass
@classmethod
def get_poctorial_data(cls, instance):
try:
res = dict()
res["id"] = instance.id
res["is_online"] = instance.is_online
res["is_deleted"] = instance.is_deleted
res["is_recommend"] = instance.is_recommend
res["name"] = instance.name
res["description"] = instance.description
res["topic_num"] = instance.topic_num
res["creator_id"] = instance.creator_id
res["icon"] = instance.icon
create_time = instance.create_time
tzlc_create_time = tzlc(create_time)
res["create_time"] = tzlc_create_time
update_time = instance.update_time
tzlc_udpate_time = tzlc(update_time)
res["update_time"] = tzlc_udpate_time
res["high_quality_topic_num"] = instance.get_high_quality_topic_num()
tag_id = instance.get_tag_by_id()
res["tag_id"] = tag_id
res["tag_name"] = instance.get_tag_by_name(tag_id)
res["topic_id_list"] =instance.get_topic_id()
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return dict()
......@@ -10,6 +10,9 @@ from libs.tools import tzlc
from trans2es.models.topic import Topic
from trans2es.models.tag import TopicTag,CommunityTagType,CommunityTagTypeRelation
import datetime
from django.conf import settings
class TagTransfer(object):
......@@ -38,26 +41,34 @@ class TagTransfer(object):
res["is_online"] = instance.is_online
res["is_deleted"] = instance.is_deleted
res["near_new_topic_num"] = 0
topic_num = 0
res["near_new_topic_num"] = topic_num
if instance.is_online==True and instance.is_deleted==False:
topic_id_list = list()
sql_result_list = TopicTag.objects.filter(tag_id=instance.id).values_list("topic_id",flat=True)
for item_id in sql_result_list:
topic_id_list.append(item_id)
sql_result_results = list(TopicTag.objects.using(settings.SLAVE_DB_NAME).filter(
tag_id=instance.id).values_list("topic_id", "is_online"))
for topic_id,is_online in sql_result_results:
if is_online:
topic_id_list.append(topic_id)
time_base_val = datetime.datetime.strftime(datetime.datetime.now()+datetime.timedelta(-7), "%Y-%m-%d")
topic_num = Topic.objects.filter(id__in=topic_id_list,create_time__gte=time_base_val).count()
for topic_begin_index in range(0,len(topic_id_list),100):
cur_topic_num = Topic.objects.using(settings.SLAVE_DB_NAME).filter(id__in=topic_id_list[topic_begin_index:topic_begin_index+100],create_time__gte=time_base_val).count()
topic_num += cur_topic_num
res["near_new_topic_num"] = topic_num
tag_type_sql_list = CommunityTagTypeRelation.objects.filter(tag_id=instance.id).values_list("tag_type_id",flat=True)
tag_type_sql_list = CommunityTagTypeRelation.objects.using(settings.SLAVE_DB_NAME).filter(tag_id=instance.id).values_list("tag_type_id",flat=True)
tag_type_list = list()
for tag_type_id in tag_type_sql_list:
tag_type_list.append(tag_type_id)
res["tag_type"] = tag_type_list
res["collection"] = instance.collection
res["is_ai"] = instance.is_ai
res["is_own"] = instance.is_own
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -6,6 +6,8 @@ import logging
import traceback
from libs.tools import tzlc
import time
import re
import datetime
class TopicTransfer(object):
......@@ -26,19 +28,60 @@ class TopicTransfer(object):
res["content_level"] = instance.content_level
res["user_id"] = instance.user_id
if instance.group:
res["group_id"] = instance.group.id
else:
res["group_id"] = -1
# if instance.group:
# res["group_id"] = instance.group.id
# else:
# res["group_id"] = -1
res["pictorial_id"] = instance.get_pictorial_id()
res["share_num"] = instance.share_num
begin = time.time()
res["pick_id_list"] = instance.get_pick_id_info()
res["tag_list"] = instance.get_topic_tag_id_list()
end = time.time()
time0 = (end-begin)
begin = time.time()
(topic_tag_id_list, edit_tag_id_list) = instance.get_topic_tag_id_list()
res["tag_list"] = topic_tag_id_list
res["edit_tag_list"] = edit_tag_id_list
end = time.time()
time1 = (end-begin)
begin = time.time()
res["tag_name_list"] = instance.get_tag_name_list(res["tag_list"])
end = time.time()
time2 = (end-begin)
begin = time.time()
res["offline_score"] = instance.get_topic_offline_score()
end = time.time()
time3 = (end-begin)
begin = time.time()
res["manual_score"] = instance.drop_score
res["has_image"] = instance.topic_has_image()
res["has_video"] = instance.has_video
res["language_type"] = instance.language_type
end = time.time()
time4 = (end-begin)
# # 片假名
# re_jp_pian_words = re.compile(u"[\u30a0-\u30ff]+")
# m_pian = re_jp_pian_words.search(instance.content, 0)
#
# # 平假名
# re_jp_ping_words = re.compile(u"[\u3040-\u309f]+")
# m_ping = re_jp_ping_words.search(instance.content, 0)
# if m_pian or m_ping:
# res["language_type"] = 10
# else:
# res["language_type"] = instance.language_type
create_time = instance.create_time
tzlc_create_time = tzlc(create_time)
......@@ -48,9 +91,10 @@ class TopicTransfer(object):
update_time = instance.update_time
tzlc_update_time = tzlc(update_time)
res["update_time"] = tzlc_update_time
# res["update_time"] = tzlc_update_time
res["update_time_val"] = int(time.mktime(tzlc_update_time.timetuple()))
logging.info("test topic transfer time cost,time0:%d,time1:%d,time2:%d,time3:%d,time4:%d" % (time0,time1,time2,time3,time4))
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
......
......@@ -7,30 +7,33 @@ import traceback
import time
from libs.tools import tzlc
from trans2es.models.user import User
from django.conf import settings
class UserTransfer(object):
@classmethod
def get_follow_user_id_list(cls,userInstance):
def get_follow_user_id_list(cls, userInstance):
follow_user_id_list = list()
user_follows = userInstance.userfollow.filter(is_online=True)
for user_follow in user_follows:
follow_user_id_list.append(user_follow.follow_id)
follow_user_detail_list = list()
sql_data_list = User.objects.filter(user_id__in=follow_user_id_list,is_online=True,is_deleted=False)
for detail_data in sql_data_list:
item = {
"user_id":detail_data.user_id,
"country_id":detail_data.country_id
}
follow_user_detail_list.append(item)
for i in range(0, len(follow_user_id_list), 1000):
sql_data_list = User.objects.using(settings.SLAVE_DB_NAME).filter(user_id__in=follow_user_id_list[i:i + 1000], is_online=True,
is_deleted=False)
for detail_data in sql_data_list:
item = {
"user_id": detail_data.user_id,
"country_id": detail_data.country_id
}
follow_user_detail_list.append(item)
return follow_user_detail_list
@classmethod
def get_user_data(cls,instance):
def get_user_data(cls, instance):
try:
res = dict()
......@@ -71,19 +74,28 @@ class UserTransfer(object):
try:
res["tag_list"] = instance.get_user_tag_id_list()
res["attention_user_id_list"] = cls.get_follow_user_id_list(userInstance=instance)
res["attention_group_id_list"] = instance.get_attention_group_id_list()
# res["attention_group_id_list"] = instance.get_attention_group_id_list()
res["pick_user_id_list"] = instance.get_pick_user_id_list()
res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["attention_pictorial_id_list"] = instance.get_attention_pictorial_id_list()
# res["same_group_user_id_list"] = instance.get_same_group_user_id_list()
res["same_pictorial_user_id_list"] = instance.get_same_pictorial_user_id_list()
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
res["tag_list"] = []
res["attention_user_id_list"] = []
res["attention_group_id_list"] = []
# res["attention_group_id_list"] = []
res["pick_user_id_list"] = []
res["same_group_user_id_list"] = []
# res["same_group_user_id_list"] = []
res["attention_pictorial_id_list"] = []
res["same_pictorial_user_id_list"] = []
return res
except:
logging.error("catch exception,err_msg:%s" % traceback.format_exc())
return {}
\ No newline at end of file
return {}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment