text miner

890ee5a4 · crazyer · eb61cf61 · 890ee5a4 · 890ee5a4 · 890ee5a4
Commit 890ee5a4 authored Jan 07, 2021 by crazyer
48 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# Default ignored files
+/workspace.xml
--- a/.idea/codeStyles/Project.xml
+++ b/.idea/codeStyles/Project.xml
+<component name="ProjectCodeStyleConfiguration">
+  <code_scheme name="Project" version="173">
+    <DBN-PSQL>
+      <case-options enabled="true">
+        <option name="KEYWORD_CASE" value="lower" />
+        <option name="FUNCTION_CASE" value="lower" />
+        <option name="PARAMETER_CASE" value="lower" />
+        <option name="DATATYPE_CASE" value="lower" />
+        <option name="OBJECT_CASE" value="preserve" />
+      </case-options>
+      <formatting-settings enabled="false" />
+    </DBN-PSQL>
+    <DBN-SQL>
+      <case-options enabled="true">
+        <option name="KEYWORD_CASE" value="lower" />
+        <option name="FUNCTION_CASE" value="lower" />
+        <option name="PARAMETER_CASE" value="lower" />
+        <option name="DATATYPE_CASE" value="lower" />
+        <option name="OBJECT_CASE" value="preserve" />
+      </case-options>
+      <formatting-settings enabled="false">
+        <option name="STATEMENT_SPACING" value="one_line" />
+        <option name="CLAUSE_CHOP_DOWN" value="chop_down_if_statement_long" />
+        <option name="ITERATION_ELEMENTS_WRAPPING" value="chop_down_if_not_single" />
+      </formatting-settings>
+    </DBN-SQL>
+  </code_scheme>
+</component>
\ No newline at end of file
--- a/.idea/dbnavigator.xml
+++ b/.idea/dbnavigator.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="DBNavigator.Project.DataEditorManager">
+    <record-view-column-sorting-type value="BY_INDEX" />
+    <value-preview-text-wrapping value="true" />
+    <value-preview-pinned value="false" />
+  </component>
+  <component name="DBNavigator.Project.DataExportManager">
+    <export-instructions>
+      <create-header value="true" />
+      <quote-values-containing-separator value="true" />
+      <quote-all-values value="false" />
+      <value-separator value="" />
+      <file-name value="" />
+      <file-location value="" />
+      <scope value="GLOBAL" />
+      <destination value="FILE" />
+      <format value="EXCEL" />
+      <charset value="UTF-8" />
+    </export-instructions>
+  </component>
+  <component name="DBNavigator.Project.DatabaseBrowserManager">
+    <autoscroll-to-editor value="false" />
+    <autoscroll-from-editor value="true" />
+    <show-object-properties value="true" />
+    <loaded-nodes />
+  </component>
+  <component name="DBNavigator.Project.DatabaseFileManager">
+    <open-files />
+  </component>
+  <component name="DBNavigator.Project.EditorStateManager">
+    <last-used-providers />
+  </component>
+  <component name="DBNavigator.Project.MethodExecutionManager">
+    <method-browser />
+    <execution-history>
+      <group-entries value="true" />
+      <execution-inputs />
+    </execution-history>
+    <argument-values-cache />
+  </component>
+  <component name="DBNavigator.Project.ObjectDependencyManager">
+    <last-used-dependency-type value="INCOMING" />
+  </component>
+  <component name="DBNavigator.Project.ObjectQuickFilterManager">
+    <last-used-operator value="EQUAL" />
+    <filters />
+  </component>
+  <component name="DBNavigator.Project.ScriptExecutionManager" clear-outputs="true">
+    <recently-used-interfaces />
+  </component>
+  <component name="DBNavigator.Project.Settings">
+    <connections />
+    <browser-settings>
+      <general>
+        <display-mode value="TABBED" />
+        <navigation-history-size value="100" />
+        <show-object-details value="false" />
+      </general>
+      <filters>
+        <object-type-filter>
+          <object-type name="SCHEMA" enabled="true" />
+          <object-type name="USER" enabled="true" />
+          <object-type name="ROLE" enabled="true" />
+          <object-type name="PRIVILEGE" enabled="true" />
+          <object-type name="CHARSET" enabled="true" />
+          <object-type name="TABLE" enabled="true" />
+          <object-type name="VIEW" enabled="true" />
+          <object-type name="MATERIALIZED_VIEW" enabled="true" />
+          <object-type name="NESTED_TABLE" enabled="true" />
+          <object-type name="COLUMN" enabled="true" />
+          <object-type name="INDEX" enabled="true" />
+          <object-type name="CONSTRAINT" enabled="true" />
+          <object-type name="DATASET_TRIGGER" enabled="true" />
+          <object-type name="DATABASE_TRIGGER" enabled="true" />
+          <object-type name="SYNONYM" enabled="true" />
+          <object-type name="SEQUENCE" enabled="true" />
+          <object-type name="PROCEDURE" enabled="true" />
+          <object-type name="FUNCTION" enabled="true" />
+          <object-type name="PACKAGE" enabled="true" />
+          <object-type name="TYPE" enabled="true" />
+          <object-type name="TYPE_ATTRIBUTE" enabled="true" />
+          <object-type name="ARGUMENT" enabled="true" />
+          <object-type name="DIMENSION" enabled="true" />
+          <object-type name="CLUSTER" enabled="true" />
+          <object-type name="DBLINK" enabled="true" />
+        </object-type-filter>
+      </filters>
+      <sorting>
+        <object-type name="COLUMN" sorting-type="NAME" />
+        <object-type name="FUNCTION" sorting-type="NAME" />
+        <object-type name="PROCEDURE" sorting-type="NAME" />
+        <object-type name="ARGUMENT" sorting-type="POSITION" />
+      </sorting>
+      <default-editors>
+        <object-type name="VIEW" editor-type="SELECTION" />
+        <object-type name="PACKAGE" editor-type="SELECTION" />
+        <object-type name="TYPE" editor-type="SELECTION" />
+      </default-editors>
+    </browser-settings>
+    <navigation-settings>
+      <lookup-filters>
+        <lookup-objects>
+          <object-type name="SCHEMA" enabled="true" />
+          <object-type name="USER" enabled="false" />
+          <object-type name="ROLE" enabled="false" />
+          <object-type name="PRIVILEGE" enabled="false" />
+          <object-type name="CHARSET" enabled="false" />
+          <object-type name="TABLE" enabled="true" />
+          <object-type name="VIEW" enabled="true" />
+          <object-type name="MATERIALIZED VIEW" enabled="true" />
+          <object-type name="NESTED TABLE" enabled="false" />
+          <object-type name="COLUMN" enabled="false" />
+          <object-type name="INDEX" enabled="true" />
+          <object-type name="CONSTRAINT" enabled="true" />
+          <object-type name="DATASET TRIGGER" enabled="true" />
+          <object-type name="DATABASE TRIGGER" enabled="true" />
+          <object-type name="SYNONYM" enabled="false" />
+          <object-type name="SEQUENCE" enabled="true" />
+          <object-type name="PROCEDURE" enabled="true" />
+          <object-type name="FUNCTION" enabled="true" />
+          <object-type name="PACKAGE" enabled="true" />
+          <object-type name="TYPE" enabled="true" />
+          <object-type name="TYPE ATTRIBUTE" enabled="false" />
+          <object-type name="ARGUMENT" enabled="false" />
+          <object-type name="DIMENSION" enabled="false" />
+          <object-type name="CLUSTER" enabled="false" />
+          <object-type name="DBLINK" enabled="true" />
+        </lookup-objects>
+        <force-database-load value="false" />
+        <prompt-connection-selection value="true" />
+        <prompt-schema-selection value="true" />
+      </lookup-filters>
+    </navigation-settings>
+    <dataset-grid-settings>
+      <general>
+        <enable-zooming value="true" />
+        <enable-column-tooltip value="true" />
+      </general>
+      <sorting>
+        <nulls-first value="true" />
+        <max-sorting-columns value="4" />
+      </sorting>
+      <tracking-columns>
+        <columnNames value="" />
+        <visible value="true" />
+        <editable value="false" />
+      </tracking-columns>
+    </dataset-grid-settings>
+    <dataset-editor-settings>
+      <text-editor-popup>
+        <active value="false" />
+        <active-if-empty value="false" />
+        <data-length-threshold value="100" />
+        <popup-delay value="1000" />
+      </text-editor-popup>
+      <values-actions-popup>
+        <show-popup-button value="true" />
+        <element-count-threshold value="1000" />
+        <data-length-threshold value="250" />
+      </values-actions-popup>
+      <general>
+        <fetch-block-size value="100" />
+        <fetch-timeout value="30" />
+        <trim-whitespaces value="true" />
+        <convert-empty-strings-to-null value="true" />
+        <select-content-on-cell-edit value="true" />
+        <large-value-preview-active value="true" />
+      </general>
+      <filters>
+        <prompt-filter-dialog value="true" />
+        <default-filter-type value="BASIC" />
+      </filters>
+      <qualified-text-editor text-length-threshold="300">
+        <content-types>
+          <content-type name="Text" enabled="true" />
+          <content-type name="Properties" enabled="true" />
+          <content-type name="XML" enabled="true" />
+          <content-type name="DTD" enabled="true" />
+          <content-type name="HTML" enabled="true" />
+          <content-type name="XHTML" enabled="true" />
+          <content-type name="SQL" enabled="true" />
+          <content-type name="PL/SQL" enabled="true" />
+          <content-type name="JSON" enabled="true" />
+          <content-type name="JSON5" enabled="true" />
+          <content-type name="YAML" enabled="true" />
+        </content-types>
+      </qualified-text-editor>
+      <record-navigation>
+        <navigation-target value="VIEWER" />
+      </record-navigation>
+    </dataset-editor-settings>
+    <code-editor-settings>
+      <general>
+        <show-object-navigation-gutter value="false" />
+        <show-spec-declaration-navigation-gutter value="true" />
+        <enable-spellchecking value="true" />
+        <enable-reference-spellchecking value="false" />
+      </general>
+      <confirmations>
+        <save-changes value="false" />
+        <revert-changes value="true" />
+      </confirmations>
+    </code-editor-settings>
+    <code-completion-settings>
+      <filters>
+        <basic-filter>
+          <filter-element type="RESERVED_WORD" id="keyword" selected="true" />
+          <filter-element type="RESERVED_WORD" id="function" selected="true" />
+          <filter-element type="RESERVED_WORD" id="parameter" selected="true" />
+          <filter-element type="RESERVED_WORD" id="datatype" selected="true" />
+          <filter-element type="RESERVED_WORD" id="exception" selected="true" />
+          <filter-element type="OBJECT" id="schema" selected="true" />
+          <filter-element type="OBJECT" id="role" selected="true" />
+          <filter-element type="OBJECT" id="user" selected="true" />
+          <filter-element type="OBJECT" id="privilege" selected="true" />
+          <user-schema>
+            <filter-element type="OBJECT" id="table" selected="true" />
+            <filter-element type="OBJECT" id="view" selected="true" />
+            <filter-element type="OBJECT" id="materialized view" selected="true" />
+            <filter-element type="OBJECT" id="index" selected="true" />
+            <filter-element type="OBJECT" id="constraint" selected="true" />
+            <filter-element type="OBJECT" id="trigger" selected="true" />
+            <filter-element type="OBJECT" id="synonym" selected="false" />
+            <filter-element type="OBJECT" id="sequence" selected="true" />
+            <filter-element type="OBJECT" id="procedure" selected="true" />
+            <filter-element type="OBJECT" id="function" selected="true" />
+            <filter-element type="OBJECT" id="package" selected="true" />
+            <filter-element type="OBJECT" id="type" selected="true" />
+            <filter-element type="OBJECT" id="dimension" selected="true" />
+            <filter-element type="OBJECT" id="cluster" selected="true" />
+            <filter-element type="OBJECT" id="dblink" selected="true" />
+          </user-schema>
+          <public-schema>
+            <filter-element type="OBJECT" id="table" selected="false" />
+            <filter-element type="OBJECT" id="view" selected="false" />
+            <filter-element type="OBJECT" id="materialized view" selected="false" />
+            <filter-element type="OBJECT" id="index" selected="false" />
+            <filter-element type="OBJECT" id="constraint" selected="false" />
+            <filter-element type="OBJECT" id="trigger" selected="false" />
+            <filter-element type="OBJECT" id="synonym" selected="false" />
+            <filter-element type="OBJECT" id="sequence" selected="false" />
+            <filter-element type="OBJECT" id="procedure" selected="false" />
+            <filter-element type="OBJECT" id="function" selected="false" />
+            <filter-element type="OBJECT" id="package" selected="false" />
+            <filter-element type="OBJECT" id="type" selected="false" />
+            <filter-element type="OBJECT" id="dimension" selected="false" />
+            <filter-element type="OBJECT" id="cluster" selected="false" />
+            <filter-element type="OBJECT" id="dblink" selected="false" />
+          </public-schema>
+          <any-schema>
+            <filter-element type="OBJECT" id="table" selected="true" />
+            <filter-element type="OBJECT" id="view" selected="true" />
+            <filter-element type="OBJECT" id="materialized view" selected="true" />
+            <filter-element type="OBJECT" id="index" selected="true" />
+            <filter-element type="OBJECT" id="constraint" selected="true" />
+            <filter-element type="OBJECT" id="trigger" selected="true" />
+            <filter-element type="OBJECT" id="synonym" selected="true" />
+            <filter-element type="OBJECT" id="sequence" selected="true" />
+            <filter-element type="OBJECT" id="procedure" selected="true" />
+            <filter-element type="OBJECT" id="function" selected="true" />
+            <filter-element type="OBJECT" id="package" selected="true" />
+            <filter-element type="OBJECT" id="type" selected="true" />
+            <filter-element type="OBJECT" id="dimension" selected="true" />
+            <filter-element type="OBJECT" id="cluster" selected="true" />
+            <filter-element type="OBJECT" id="dblink" selected="true" />
+          </any-schema>
+        </basic-filter>
+        <extended-filter>
+          <filter-element type="RESERVED_WORD" id="keyword" selected="true" />
+          <filter-element type="RESERVED_WORD" id="function" selected="true" />
+          <filter-element type="RESERVED_WORD" id="parameter" selected="true" />
+          <filter-element type="RESERVED_WORD" id="datatype" selected="true" />
+          <filter-element type="RESERVED_WORD" id="exception" selected="true" />
+          <filter-element type="OBJECT" id="schema" selected="true" />
+          <filter-element type="OBJECT" id="user" selected="true" />
+          <filter-element type="OBJECT" id="role" selected="true" />
+          <filter-element type="OBJECT" id="privilege" selected="true" />
+          <user-schema>
+            <filter-element type="OBJECT" id="table" selected="true" />
+            <filter-element type="OBJECT" id="view" selected="true" />
+            <filter-element type="OBJECT" id="materialized view" selected="true" />
+            <filter-element type="OBJECT" id="index" selected="true" />
+            <filter-element type="OBJECT" id="constraint" selected="true" />
+            <filter-element type="OBJECT" id="trigger" selected="true" />
+            <filter-element type="OBJECT" id="synonym" selected="true" />
+            <filter-element type="OBJECT" id="sequence" selected="true" />
+            <filter-element type="OBJECT" id="procedure" selected="true" />
+            <filter-element type="OBJECT" id="function" selected="true" />
+            <filter-element type="OBJECT" id="package" selected="true" />
+            <filter-element type="OBJECT" id="type" selected="true" />
+            <filter-element type="OBJECT" id="dimension" selected="true" />
+            <filter-element type="OBJECT" id="cluster" selected="true" />
+            <filter-element type="OBJECT" id="dblink" selected="true" />
+          </user-schema>
+          <public-schema>
+            <filter-element type="OBJECT" id="table" selected="true" />
+            <filter-element type="OBJECT" id="view" selected="true" />
+            <filter-element type="OBJECT" id="materialized view" selected="true" />
+            <filter-element type="OBJECT" id="index" selected="true" />
+            <filter-element type="OBJECT" id="constraint" selected="true" />
+            <filter-element type="OBJECT" id="trigger" selected="true" />
+            <filter-element type="OBJECT" id="synonym" selected="true" />
+            <filter-element type="OBJECT" id="sequence" selected="true" />
+            <filter-element type="OBJECT" id="procedure" selected="true" />
+            <filter-element type="OBJECT" id="function" selected="true" />
+            <filter-element type="OBJECT" id="package" selected="true" />
+            <filter-element type="OBJECT" id="type" selected="true" />
+            <filter-element type="OBJECT" id="dimension" selected="true" />
+            <filter-element type="OBJECT" id="cluster" selected="true" />
+            <filter-element type="OBJECT" id="dblink" selected="true" />
+          </public-schema>
+          <any-schema>
+            <filter-element type="OBJECT" id="table" selected="true" />
+            <filter-element type="OBJECT" id="view" selected="true" />
+            <filter-element type="OBJECT" id="materialized view" selected="true" />
+            <filter-element type="OBJECT" id="index" selected="true" />
+            <filter-element type="OBJECT" id="constraint" selected="true" />
+            <filter-element type="OBJECT" id="trigger" selected="true" />
+            <filter-element type="OBJECT" id="synonym" selected="true" />
+            <filter-element type="OBJECT" id="sequence" selected="true" />
+            <filter-element type="OBJECT" id="procedure" selected="true" />
+            <filter-element type="OBJECT" id="function" selected="true" />
+            <filter-element type="OBJECT" id="package" selected="true" />
+            <filter-element type="OBJECT" id="type" selected="true" />
+            <filter-element type="OBJECT" id="dimension" selected="true" />
+            <filter-element type="OBJECT" id="cluster" selected="true" />
+            <filter-element type="OBJECT" id="dblink" selected="true" />
+          </any-schema>
+        </extended-filter>
+      </filters>
+      <sorting enabled="true">
+        <sorting-element type="RESERVED_WORD" id="keyword" />
+        <sorting-element type="RESERVED_WORD" id="datatype" />
+        <sorting-element type="OBJECT" id="column" />
+        <sorting-element type="OBJECT" id="table" />
+        <sorting-element type="OBJECT" id="view" />
+        <sorting-element type="OBJECT" id="materialized view" />
+        <sorting-element type="OBJECT" id="index" />
+        <sorting-element type="OBJECT" id="constraint" />
+        <sorting-element type="OBJECT" id="trigger" />
+        <sorting-element type="OBJECT" id="synonym" />
+        <sorting-element type="OBJECT" id="sequence" />
+        <sorting-element type="OBJECT" id="procedure" />
+        <sorting-element type="OBJECT" id="function" />
+        <sorting-element type="OBJECT" id="package" />
+        <sorting-element type="OBJECT" id="type" />
+        <sorting-element type="OBJECT" id="dimension" />
+        <sorting-element type="OBJECT" id="cluster" />
+        <sorting-element type="OBJECT" id="dblink" />
+        <sorting-element type="OBJECT" id="schema" />
+        <sorting-element type="OBJECT" id="role" />
+        <sorting-element type="OBJECT" id="user" />
+        <sorting-element type="RESERVED_WORD" id="function" />
+        <sorting-element type="RESERVED_WORD" id="parameter" />
+      </sorting>
+      <format>
+        <enforce-code-style-case value="true" />
+      </format>
+    </code-completion-settings>
+    <execution-engine-settings>
+      <statement-execution>
+        <fetch-block-size value="100" />
+        <execution-timeout value="20" />
+        <debug-execution-timeout value="600" />
+        <focus-result value="false" />
+        <prompt-execution value="false" />
+      </statement-execution>
+      <script-execution>
+        <command-line-interfaces />
+        <execution-timeout value="300" />
+      </script-execution>
+      <method-execution>
+        <execution-timeout value="30" />
+        <debug-execution-timeout value="600" />
+        <parameter-history-size value="10" />
+      </method-execution>
+    </execution-engine-settings>
+    <operation-settings>
+      <transactions>
+        <uncommitted-changes>
+          <on-project-close value="ASK" />
+          <on-disconnect value="ASK" />
+          <on-autocommit-toggle value="ASK" />
+        </uncommitted-changes>
+        <multiple-uncommitted-changes>
+          <on-commit value="ASK" />
+          <on-rollback value="ASK" />
+        </multiple-uncommitted-changes>
+      </transactions>
+      <session-browser>
+        <disconnect-session value="ASK" />
+        <kill-session value="ASK" />
+        <reload-on-filter-change value="false" />
+      </session-browser>
+      <compiler>
+        <compile-type value="KEEP" />
+        <compile-dependencies value="ASK" />
+        <always-show-controls value="false" />
+      </compiler>
+      <debugger>
+        <debugger-type value="JDBC" />
+        <use-generic-runners value="true" />
+      </debugger>
+    </operation-settings>
+    <ddl-file-settings>
+      <extensions>
+        <mapping file-type-id="VIEW" extensions="vw" />
+        <mapping file-type-id="TRIGGER" extensions="trg" />
+        <mapping file-type-id="PROCEDURE" extensions="prc" />
+        <mapping file-type-id="FUNCTION" extensions="fnc" />
+        <mapping file-type-id="PACKAGE" extensions="pkg" />
+        <mapping file-type-id="PACKAGE_SPEC" extensions="pks" />
+        <mapping file-type-id="PACKAGE_BODY" extensions="pkb" />
+        <mapping file-type-id="TYPE" extensions="tpe" />
+        <mapping file-type-id="TYPE_SPEC" extensions="tps" />
+        <mapping file-type-id="TYPE_BODY" extensions="tpb" />
+      </extensions>
+      <general>
+        <lookup-ddl-files value="true" />
+        <create-ddl-files value="false" />
+        <synchronize-ddl-files value="true" />
+        <use-qualified-names value="false" />
+        <make-scripts-rerunnable value="true" />
+      </general>
+    </ddl-file-settings>
+    <general-settings>
+      <regional-settings>
+        <date-format value="MEDIUM" />
+        <number-format value="UNGROUPED" />
+        <locale value="SYSTEM_DEFAULT" />
+        <use-custom-formats value="false" />
+      </regional-settings>
+      <environment>
+        <environment-types>
+          <environment-type id="development" name="Development" description="Development environment" color="-2430209/-12296320" readonly-code="false" readonly-data="false" />
+          <environment-type id="integration" name="Integration" description="Integration environment" color="-2621494/-12163514" readonly-code="true" readonly-data="false" />
+          <environment-type id="production" name="Production" description="Productive environment" color="-11574/-10271420" readonly-code="true" readonly-data="true" />
+          <environment-type id="other" name="Other" description="" color="-1576/-10724543" readonly-code="false" readonly-data="false" />
+        </environment-types>
+        <visibility-settings>
+          <connection-tabs value="true" />
+          <dialog-headers value="true" />
+          <object-editor-tabs value="true" />
+          <script-editor-tabs value="false" />
+          <execution-result-tabs value="true" />
+        </visibility-settings>
+      </environment>
+    </general-settings>
+  </component>
+  <component name="DBNavigator.Project.StatementExecutionManager">
+    <execution-variables />
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/gm-text-miner.iml
+++ b/.idea/gm-text-miner.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="pytest" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/gm-text-miner.iml" filepath="$PROJECT_DIR$/.idea/gm-text-miner.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/Jenkinsfile
+++ b/Jenkinsfile
+@Library('gm-pipeline-library') _
+
+pipeline {
+  agent  any
+
+  options {
+    // Console output add timestamps
+    timestamps()
+
+        // Disallow concurrent executions of the Pipeline
+    disableConcurrentBuilds()
+
+    // On failure, retry the entire Pipeline the specified number of times.
+    retry(1)
+  }
+
+  parameters {
+    choice(name: 'cache', choices: ['', '--no-cache'], description: 'docker build 是否使用cache,默认使用,不使用为--no-cache')
+  }
+
+  stages {
+    stage('打包') {
+      steps {
+        script {
+          sh 'python setup.py sdist'
+        }
+      }
+    }
+    stage('发布Pypi') {
+      steps {
+        script {
+          sh 'twine upload --skip-existing -r coding-pypi dist/* '
+        }
+      }
+    }
+  }
+
+  post {
+      always {
+          dingNotify "after", "${currentBuild.currentResult}"
+      }
+  }
+}
\ No newline at end of file
--- a/MANIFEST.in
+++ b/MANIFEST.in
+include dicts/*
\ No newline at end of file
--- a/algorithm/__init__.py
+++ b/algorithm/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/algorithm/text_classifical/__init__.py
+++ b/algorithm/text_classifical/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/algorithm/text_classifical/base.py
+++ b/algorithm/text_classifical/base.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from preprocesser.processors import token_processor
+from preprocesser.filter import stopwords_filter
+from collections import Counter
+from config import config
+import os
+
+
+class SELECTED_CONTENT_TYPE():
+    BEAUTY_PROJECT = (1, "医美项目")
+    BEAUTY_STAR = (2, "明星医美")
+    BEAUTY_CELEBRITY = (3, "网红医美")
+    STAR_GOSSIP = (4, "明星八卦")
+    CELEBRITY_GOSSIP = (5, "网红八卦")
+
+
+class TextClassifical(object):
+    def __init__(self, network_influencer_path, project_path, star_path):
+        self.network_influencer_words = self.build_network_influencer_words(network_influencer_path)
+        self.project_words = self.build_project_words(project_path)
+        self.star_words = self.build_star_words(star_path)
+        self.tokenprocessor = token_processor
+        self.stopwords_filter = stopwords_filter
+
+    def build_network_influencer_words(self, word_path):
+        ret = {}
+        for line in open(word_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def build_project_words(self, project_path):
+        ret = {}
+        for line in open(project_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def build_star_words(self, star_path):
+        ret = {}
+        for line in open(star_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def run(self, content):
+        ret = {
+            "content_type": -1,
+            "star": [],
+            "celebrity": [],
+            "projects": []
+        }
+        words = self.tokenprocessor.lcut(content, cut_all=True)
+        words = stopwords_filter.filter(words)
+        netword_influencer_concurrence = set(words) & set(self.network_influencer_words)
+        project_word_concurrence = set(words) & set(self.project_words)
+        star_words_concurrence = set(words) & set(self.star_words)
+        counter = Counter(words)
+        content_type, words_proba = self.predict(counter, netword_influencer_concurrence, project_word_concurrence,
+                                                 star_words_concurrence)
+        ret["content_type"] = content_type
+        ret["star"].extend([{word: words_proba[2].get(word, 0.0)} for word in list(star_words_concurrence)])
+        ret["celebrity"].extend(
+            [{word: words_proba[0].get(word, 0.0)} for word in list(netword_influencer_concurrence)])
+        ret["projects"].extend(
+            [{word: words_proba[1].get(word, 0.0)} for word in list(project_word_concurrence)])
+        return ret
+
+    def score(self, counter, concurrence_words):
+        pass
+
+    def predict(self, counter, netword_influencer_concurrence, project_word_concurrence, star_words_concurrence):
+        words_proba = []
+        net_influencer_total = sum([counter[word] * 2 for word in netword_influencer_concurrence])
+        net_influencer_proba = {word: float(counter[word] * 2) / net_influencer_total for word in
+                                netword_influencer_concurrence}
+        words_proba.append(net_influencer_proba)
+        project_words_total = sum([counter[word] for word in project_word_concurrence])
+        project_words_proba = {word: float(counter[word]) / project_words_total for word in project_word_concurrence}
+        words_proba.append(project_words_proba)
+        star_words_total = sum([counter[word] * 2 for word in star_words_concurrence])
+        star_words_proba = {word: float(counter[word] * 2) / star_words_total for word in star_words_concurrence}
+        words_proba.append(star_words_proba)
+        total_word = sum([net_influencer_total, project_words_total, star_words_total])
+        if total_word <= 0:
+            return -1, words_proba
+        each_proba = [float(item) / total_word for item in
+                      [net_influencer_total, project_words_total, star_words_total]]
+        if each_proba[1] <= 0 and each_proba[2] >= each_proba[0]:
+            return SELECTED_CONTENT_TYPE.STAR_GOSSIP[0], words_proba
+        elif each_proba[1] <= 0 and each_proba[2] < each_proba[0]:
+            return SELECTED_CONTENT_TYPE[0], words_proba
+        elif each_proba[1] > 0.75:
+            return SELECTED_CONTENT_TYPE.BEAUTY_PROJECT[0], words_proba
+        elif each_proba[0] > each_proba[2]:
+            return SELECTED_CONTENT_TYPE.BEAUTY_CELEBRITY[0], words_proba
+        else:
+            return SELECTED_CONTENT_TYPE.BEAUTY_STAR[0], words_proba
+
+
+root_path = "/".join(str(__file__).split("/")[:-3])
+model = TextClassifical(os.path.join(root_path, config.network_influcer_dic),
+                        os.path.join(root_path, config.projects_dic), os.path.join(root_path, config.star_dic))
--- a/build/lib/algorithm/__init__.py
+++ b/build/lib/algorithm/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/build/lib/algorithm/text_classifical/__init__.py
+++ b/build/lib/algorithm/text_classifical/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/build/lib/algorithm/text_classifical/base.py
+++ b/build/lib/algorithm/text_classifical/base.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from preprocesser.processors import token_processor
+from preprocesser.filter import stopwords_filter
+from collections import Counter
+from config import config
+import os
+
+
+class SELECTED_CONTENT_TYPE():
+    BEAUTY_PROJECT = (1, "医美项目")
+    BEAUTY_STAR = (2, "明星医美")
+    BEAUTY_CELEBRITY = (3, "网红医美")
+    STAR_GOSSIP = (4, "明星八卦")
+    CELEBRITY_GOSSIP = (5, "网红八卦")
+
+
+class TextClassifical(object):
+    def __init__(self, network_influencer_path, project_path, star_path):
+        self.network_influencer_words = self.build_network_influencer_words(network_influencer_path)
+        self.project_words = self.build_project_words(project_path)
+        self.star_words = self.build_star_words(star_path)
+        self.tokenprocessor = token_processor
+        self.stopwords_filter = stopwords_filter
+
+    def build_network_influencer_words(self, word_path):
+        ret = {}
+        for line in open(word_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def build_project_words(self, project_path):
+        ret = {}
+        for line in open(project_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def build_star_words(self, star_path):
+        ret = {}
+        for line in open(star_path, "r", errors="ignore"):
+            line = line.strip()
+            ret[line] = 1
+        return ret
+
+    def run(self, content):
+        ret = {
+            "content_type": -1,
+            "star": [],
+            "celebrity": [],
+            "projects": []
+        }
+        words = self.tokenprocessor.lcut(content, cut_all=True)
+        words = stopwords_filter.filter(words)
+        netword_influencer_concurrence = set(words) & set(self.network_influencer_words)
+        project_word_concurrence = set(words) & set(self.project_words)
+        star_words_concurrence = set(words) & set(self.star_words)
+        counter = Counter(words)
+        content_type, words_proba = self.predict(counter, netword_influencer_concurrence, project_word_concurrence,
+                                                 star_words_concurrence)
+        ret["content_type"] = content_type
+        ret["star"].extend([{word: words_proba[2].get(word, 0.0)} for word in list(star_words_concurrence)])
+        ret["celebrity"].extend(
+            [{word: words_proba[0].get(word, 0.0)} for word in list(netword_influencer_concurrence)])
+        ret["projects"].extend(
+            [{word: words_proba[1].get(word, 0.0)} for word in list(project_word_concurrence)])
+        return ret
+
+    def score(self, counter, concurrence_words):
+        pass
+
+    def predict(self, counter, netword_influencer_concurrence, project_word_concurrence, star_words_concurrence):
+        words_proba = []
+        net_influencer_total = sum([counter[word] * 2 for word in netword_influencer_concurrence])
+        net_influencer_proba = {word: float(counter[word] * 2) / net_influencer_total for word in
+                                netword_influencer_concurrence}
+        words_proba.append(net_influencer_proba)
+        project_words_total = sum([counter[word] for word in project_word_concurrence])
+        project_words_proba = {word: float(counter[word]) / project_words_total for word in project_word_concurrence}
+        words_proba.append(project_words_proba)
+        star_words_total = sum([counter[word] * 2 for word in star_words_concurrence])
+        star_words_proba = {word: float(counter[word] * 2) / star_words_total for word in star_words_concurrence}
+        words_proba.append(star_words_proba)
+        total_word = sum([net_influencer_total, project_words_total, star_words_total])
+        if total_word <= 0:
+            return -1, words_proba
+        each_proba = [float(item) / total_word for item in
+                      [net_influencer_total, project_words_total, star_words_total]]
+        if each_proba[1] <= 0 and each_proba[2] >= each_proba[0]:
+            return SELECTED_CONTENT_TYPE.STAR_GOSSIP[0], words_proba
+        elif each_proba[1] <= 0 and each_proba[2] < each_proba[0]:
+            return SELECTED_CONTENT_TYPE[0], words_proba
+        elif each_proba[1] > 0.75:
+            return SELECTED_CONTENT_TYPE.BEAUTY_PROJECT[0], words_proba
+        elif each_proba[0] > each_proba[2]:
+            return SELECTED_CONTENT_TYPE.BEAUTY_CELEBRITY[0], words_proba
+        else:
+            return SELECTED_CONTENT_TYPE.BEAUTY_STAR[0], words_proba
+
+
+root_path = "/".join(str(__file__).split("/")[:-3])
+model = TextClassifical(os.path.join(root_path, config.network_influcer_dic),
+                        os.path.join(root_path, config.projects_dic), os.path.join(root_path, config.star_dic))
--- a/build/lib/config/__init__.py
+++ b/build/lib/config/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/build/lib/config/config.py
+++ b/build/lib/config/config.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+DEBUG = False
+"""
+recall_topK：
+    我们选取多少个候选词
+sort_topK：
+    排序后挑选的词汇数量
+min_frequence:
+    所有词汇必须共现的次数
+stopwords_path：
+    停用词路径
+words_path:
+    词典path
+"""
+stopwords_path = "dicts/stopwords.dic"
+words_path = "dicts/words.dic"
+
+network_influcer_dic = "dicts/network_influcer.dic"
+projects_dic = "dicts/project.dic"
+star_dic = "dicts/star.dic"
--- a/build/lib/dicts/__init__.py
+++ b/build/lib/dicts/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/build/lib/dicts/network_influcer.dic
+++ b/build/lib/dicts/network_influcer.dic
+丁真
+周扬青
+冯提莫
+半藏森林
+艾比
+韩安冉
+南笙
+奶茶妹妹
+宋昕冉
+林小宅
+晚晚
+谢安然
+王柠萌
+Naomi
+于文红
+甜仇
+温精灵
+温婉
+Fiona宋亮
+李蒽熙
+una
+夏夏
+水野亚美
+小小如
+卓亨瑜
+彭王者
+滕雨佳
+腻腻ninii
+李恩童
+花珊珊
+小初
+小饼干
+晚妹
+吃一口甜
+徐清婉
+Jy小语
+张贤静
+施安妮
+周子然Femi
+XIZI杨
+彦崽儿
+潘白雪
+方恰拉
+MAGBOW
+CHU小初
+不求上进的柚砸
+LU一丝
+大佬儿
+姚淇瀚Henry
+周小濛
+半藏
+Abbily
+王嘉辉
+罗小伊
+章泽天
+林晓婷
+晚奶
+柠檬
+康雅馨
+仇琳琳
+温仙女
+张曼如
+雷婉婷
+土豆公主
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/build/lib/dicts/project.dic
+++ b/build/lib/dicts/project.dic
+眼部整形
+鼻部整形
+面部轮廓
+瘦身美体
+皮肤美容
+胸部塑身
+毛发管理
+私密整形
+口腔齿科
+口唇整形
+注射美容
+自体脂肪
+半永久
+健康管理
+其他
+眼科
+妇产科
+骨整形
+SPA/按摩
+耳部其他
+牙齿治疗
+口腔其他
+颈部
+除皱
+毛发其他
+药物脱毛
+手术面部提升
+自体脂肪面部年轻化
+耳部矫正
+丰耳垂
+妊娠纹
+瘦腿
+面部吸脂
+腰腹抽脂
+鼻头整形
+光子脱毛
+激光洗眉
+祛皱
+下眼睑
+自体脂肪面部填充.
+收缩毛孔
+自体脂肪填充全脸加prp
+美容SPA生活美容
+祛痘生活美容
+瘦身生活美容
+美肤生活美容
+产后塑形生活美容
+胸部塑形
+上睑整形
+下睑整形
+胸型美化
+耳部整形
+双眼皮
+开眼角
+祛眼袋
+祛黑眼圈
+隆鼻
+鼻翼
+鼻形矫正
+颧骨颧弓
+额头
+太阳穴
+酒窝
+祛疤痕痘印
+祛斑祛色素
+改善肤质
+下巴
+美白嫩肤
+牙齿整形
+美容冠
+填充卧蚕
+眼部修复
+鼻部修复
+丰胸（隆胸）
+乳头乳晕整形
+祛副乳
+胸形美化
+胸部修复
+中医减肥
+轮廓修复
+微整修复
+牙齿矫正
+牙齿美白
+洗牙
+补牙
+种植牙
+吸脂
+下颌角
+拔牙
+垫眉弓
+鼻基底
+两颚
+唇形
+抗衰紧致
+近视矫正
+远视矫正
+散光矫正
+斜视矫正
+眼科其他
+女性私密
+男性私密
+祛腋臭
+产后修复
+眼睑
+毛发移植
+超声溶脂
+射频溶脂
+玻尿酸
+清洁补水
+瘦脸除皱
+半永久妆
+自体脂肪面部填充
+牙贴面
+激光脱毛
+冷冻溶脂
+体检
+眼部护理
+胶原蛋白注射
+少女针
+童颜针
+皮肤检测
+鼻部综合
+眼部综合
+轮廓套餐
+光纤溶脂
+自体脂肪身体塑形
+胸部套餐
+注射美肤
+中医理疗
+口腔治疗
+妇科检查
+瘦脸
+其他美体
+吸脂失败修复
+其他种植
+头部养护
+眼科疾病
+预防保健
+妇科疾病
+产科
+手骨整形
+足骨整形
+腿骨整形
+身体护理
+手足护理
+面部护理
+鼻小柱
+人中
+自体脂肪填充失败修复
+鼻综合
+激光脱面部其他毛发
+切开双眼皮
+埋线双眼皮
+定点双眼皮
+开内眼角
+开外眼角
+内切祛眼袋
+外切祛眼袋
+激光祛眼袋
+吸脂祛眼袋
+上眼睑下垂矫正
+玻尿酸填充卧蚕
+自体脂肪填充泪沟
+玻尿酸填充泪沟
+切眉
+激光祛黑眼圈
+自体脂肪填充黑眼圈
+鼻部膨体假体取出
+驼峰鼻矫正
+鹰钩鼻矫正
+歪鼻矫正
+朝天鼻矫正
+缩鼻背
+手术缩鼻头
+自体脂肪隆鼻
+玻尿酸隆鼻
+自体软骨垫鼻尖
+注射瘦脸
+吸脂瘦脸
+自体脂肪丰苹果肌
+玻尿酸填充丰苹果肌
+颧骨内推
+玻尿酸丰下巴
+下巴前推
+自体脂肪丰下巴
+硅胶垫下巴
+膨体垫下巴
+缩下巴
+宽下巴矫正
+激光溶脂祛双下巴
+酒窝成形术
+自体脂肪丰面颊
+自体脂肪丰太阳穴
+玻尿酸丰太阳穴
+硅胶丰太阳穴
+膨体丰太阳穴
+自体脂肪丰额头
+玻尿酸丰额头
+硅胶丰额头
+膨体丰额头
+招风耳矫正
+大耳缩小术
+杯状耳矫正
+唇腭裂修复
+厚唇改薄术
+玻尿酸丰唇
+自体脂肪丰唇
+处女膜修复
+包皮手术
+填充阴唇
+阴唇整形
+注射祛腋臭
+激光祛腋臭
+激光脱发际线
+植眉
+种鬓角
+植睫毛
+黑脸娃娃
+激光脱唇毛
+激光祛红血丝
+光子嫩肤
+激光祛纹身
+玻尿酸除颈纹
+自体脂肪除颈纹
+玻尿酸除法令纹
+自体脂肪除法令纹
+玻尿酸除眉间纹
+胸部失败修复
+乳房再造
+假体隆胸
+吸脂祛副乳
+手术祛副乳
+乳头内陷矫正
+乳头缩小
+乳晕缩小
+乳晕漂染
+乳房缩小
+胸部下垂矫正
+中医减肥其他项目
+吸脂瘦背部
+自体脂肪丰臀
+产后塑形
+腹壁成形术
+膨体隆鼻
+硅胶隆鼻
+自体肋软骨隆鼻
+激光溶脂瘦脸
+注射瘦腿
+吸脂瘦大腿
+吸脂瘦小腿
+吸脂瘦手臂
+自体脂肪隆胸
+玻尿酸除面部细纹
+自体脂肪除面部细纹
+植发际线
+吸脂祛双下巴
+鼻翼缩小
+全瓷牙
+手术祛腋臭
+像素激光
+植胡须
+吸脂塑臀
+点阵激光
+玻尿酸丰面颊
+玻尿酸祛黑眼圈
+胶原蛋白填充丰唇
+胶原蛋白填充祛黑眼圈
+胶原蛋白注射隆鼻
+胶原蛋白丰太阳穴
+胶原蛋白填充丰下巴
+胶原蛋白填充隆胸
+胶原蛋白填充丰臀
+胶原蛋白填充除法令纹
+胶原蛋白填充丰面颊
+胶原蛋白填充泪沟
+下眼睑下至
+泪腺脱垂整形
+自体脂肪填充卧蚕
+真皮填充卧蚕
+埋线隆鼻
+鼻中隔软骨垫鼻头
+注射缩鼻头
+假体垫鼻基底
+玻尿酸垫鼻基底
+自体脂肪垫鼻基底
+鼻中隔软骨隆鼻
+鼻孔矫正
+溶脂针瘦脸
+颧骨提高
+凸嘴矫正
+面部不对称改善
+人工骨垫下巴
+胶原蛋白填充苹果肌
+白瓷娃娃
+热玛吉
+PRP自体血清美肤
+美白针
+手术祛疤
+综合祛疤
+注射祛疤
+激光点痣
+射频溶脂瘦手臂
+小腿神经阻断术
+假体丰臀
+吸脂瘦肩
+注射瘦肩
+点穴减肥
+针灸减肥
+埋线减肥
+错颌锁颌
+隐形矫正
+根管治疗
+M唇成形术
+阴茎延长
+阴茎增粗
+玻尿酸丰耳垂
+轮廓修复术
+胸部假体取出
+超声提升
+线雕
+双眼皮修复
+眶隔脂肪释放
+玻尿酸垫眉弓
+自体脂肪垫眉弓
+硅胶垫眉弓
+膨体垫眉弓
+上眼睑祛脂
+鼻小柱延长
+额头缩小
+人中缩短
+射频溶脂塑臀
+漂唇
+红蓝光祛痘
+化学剥脱祛斑
+冷光美白
+激光祛疤
+拉皮
+黄金微针
+激光除皱
+埋线除皱
+放射状角膜切开术
+准分子激光切削术
+准分子激光原位角膜磨镶术
+透明晶体摘除术
+远视镜片矫正
+激光屈光性角膜切削术
+圆柱镜矫正
+角膜接触镜矫正
+角膜屈光手术
+斜视镜片矫正
+斜视矫正手术
+眼科检查
+玻尿酸填充隆胸
+激光紧缩阴道
+眼袋修复
+鼻形态修复
+鼻部硅胶假体取出
+激光除妊娠纹
+黄金微针除妊娠纹
+冷冻祛腋臭
+半永久纹眉
+水氧活肤
+自体脂肪全面部填充
+镭射净肤
+半永久纹眼线
+水光针
+无针水光
+射频提升
+激光脱腋毛
+激光脱比基尼部位
+激光脱臂毛
+激光脱大腿/小腿毛
+彩光嫩肤
+耳软骨隆鼻
+果酸焕肤
+身体检查
+玻尿酸溶解酶
+眼综合
+开眼角修复
+合金烤瓷牙
+二氧化锆烤瓷牙
+超声波洗牙
+树脂补牙
+拔龋齿
+拔智齿
+嘴角上扬术
+口腔检查
+孕睫术
+玻尿酸精华导入
+双颚手术
+热拉提
+人工骨隆鼻
+自体真皮隆鼻
+鼻小柱缩短
+长鼻矫正
+颧弓降低
+微针祛痘坑
+激光祛斑
+皮秒激光
+小气泡美肤
+微针水光
+玻尿酸丰乳头
+乳头再造
+胶原蛋白注射丰乳晕
+提眉修复
+卧蚕手术修复
+半永久纹发际线
+颧骨填充术
+微晶瓷隆鼻
+激光祛脂肪粒
+少女针注射
+童颜针注射
+假体填充苹果肌
+抗敏修复
+洗眼线
+肤质检测
+阴蒂整形
+菲洛嘉
+HPV检查
+全脸整形套餐
+V脸套餐
+祛斑套餐
+美白套餐
+补水套餐
+皮肤综合管理套餐
+洗牙套餐
+唇部综合
+除皱套餐
+玻尿酸注射套餐
+乳腺检查
+全飞秒
+半飞秒
+ICL晶体植入
+异物取出
+美胸套餐
+激光脱全身毛发
+射频祛眼袋
+O型腿矫正
+X型腿矫正
+G点注射
+牙齿瓷贴面
+自体软骨垫鼻基底
+女性私密检查
+阴唇漂红
+手术紧缩阴道
+性腺激活
+种植牙齿
+下颌角切除术
+玻尿酸丰眼窝
+注射去鼻背纹
+注射去口周纹
+注射去动态纹
+注射治疗多汗
+注射颏肌放松
+美白导入
+埋线祛眼袋
+光纤溶脂祛眼袋
+微针祛黑眼圈
+射频祛黑眼圈
+眼综合修复
+宽鼻矫正
+颧骨颧弓整形术
+下巴截骨
+下巴硅胶假体取出
+下巴膨体假体取出
+下颚前突/地包天
+上颚前突/天包地
+下颌角整形
+取颊脂垫瘦脸
+射频溶脂瘦脸
+光纤溶脂瘦脸
+冷冻溶脂瘦腹部
+冷冻溶脂瘦腰部
+冷冻溶脂瘦手臂
+冷冻溶脂瘦大腿
+冷冻溶脂瘦小腿
+冷冻溶脂瘦肩膀
+冷冻溶脂瘦背部
+冷冻溶脂瘦臀部
+射频溶脂瘦大腿
+射频溶脂瘦小腿
+射频溶脂瘦肩膀
+射频溶脂瘦背部
+射频溶脂瘦腰部
+射频溶脂瘦腹部
+超声溶脂瘦手臂
+超声溶脂瘦腰部
+超声溶脂瘦腹部
+超声溶脂瘦大腿
+超声溶脂瘦小腿
+超声溶脂瘦臀部
+超声溶脂瘦背部
+超声溶脂瘦肩部
+光纤溶脂瘦手臂
+光纤溶脂瘦腰部
+光纤溶脂瘦腹部
+光纤溶脂瘦大腿
+光纤溶脂瘦小腿
+光纤溶脂瘦背部
+光纤溶脂瘦臀部
+光纤溶脂瘦肩部
+吸脂瘦腰部
+吸脂瘦腹部
+吸脂瘦全身
+吸脂失败修复术
+杏仁酸焕肤
+水杨酸焕肤
+清痘针
+冷冻祛痣
+手术祛胎记
+激光祛胎记
+洗眉
+小棕瓶美白
+水光娃娃
+种植面膜
+SMAS除皱手术
+内窥镜手术提升
+小切口手术提升
+干细胞疗法
+复合隆胸
+男性乳房肥大矫正
+头顶加密种植
+美人尖种植
+种植胸毛
+种植私密毛发
+疤痕种植毛发
+防脱理疗
+乌发理疗
+深层清洁
+养发护理
+头部SPA
+头皮护理
+头皮控油
+毛囊检测
+私密超声提升
+私密综合项目男
+私密综合项目女
+私密清洁
+私密护理
+牙齿综合
+颌面正畸
+金属托槽矫正
+自锁托槽矫正
+激光牙龈去色素
+皓齿美白
+激光美白牙
+贵金属烤瓷牙
+牙齿抛光
+喷砂洗牙
+其他材料补牙
+树脂贴面
+儿童口腔预防窝沟封闭
+mrc肌功能矫治
+口腔溃疡
+牙髓炎
+牙龈炎
+显微镜根管治疗
+牙周护理
+玻尿酸全脸填充
+嗨体祛颈纹
+注射祛法令纹
+注射祛鱼尾纹
+注射祛抬头纹
+注射祛川字纹
+注射祛木偶纹
+注射祛露龈笑
+注射祛面部细纹
+胶原蛋白填充额头
+胶原蛋白填充面部细纹
+副耳祛除术
+OK镜近视矫正
+绿飞秒
+后巩膜加固术
+视网膜脱落
+角膜炎
+沙眼
+散光
+结膜炎
+青光眼
+干眼护理
+眼部清洗
+眼镜试戴
+眼部穴位按摩
+宫颈癌疫苗
+基因检测
+中老年体检
+女性体检
+产后乳房护理
+阴道修复
+乳腺疏通
+盆底肌修复
+产后熏蒸
+盆腔炎
+宫颈炎
+卵巢囊肿
+子宫肌瘤
+月经不调
+阴道炎检查
+痛经检查
+备孕检查
+剖腹产手术
+四维彩超
+产检
+分娩
+坐月子
+肘内翻矫正
+肘外翻矫正
+手指巨指矫正
+多指矫正
+手指断指再造
+足内翻矫正
+马蹄足矫正
+长短腿矫正
+膝外翻矫正
+膝内翻矫正
+经络疏通
+艾灸理疗
+刮痧拔罐
+中医药浴
+中医按摩
+全身SPA
+背部护理
+美胸护理
+肩颈护理
+腿部护理
+臀部护理
+手部护理
+足疗
+眼部保养
+半永久睫毛线
+半永久纹唇
+接睫毛
+半永久美瞳线
+拔罐减肥
+内眦赘皮矫正
+大脚骨整形
+综合祛眼袋
+自体脂肪私密紧致
+私密脱毛
+脱脚毛
+脱背部
+脱络腮胡
+拔乳牙
+脂肪填充失败修复
+脂肪填充
\ No newline at end of file
--- a/build/lib/dicts/star.dic
+++ b/build/lib/dicts/star.dic
+肖战
+杨紫
+赵丽颖
+杨幂
+倪妮
+迪丽热巴
+范冰冰
+鞠婧祎
+刘诗诗
+Lisa
+吴宣仪
+赵露思
+杨超越
+Angelababy
+高圆圆
+章子怡
+乔欣
+张雨绮
+孙怡
+江疏影
+毛晓彤
+张馨予
+王祖贤
+张子枫
+陈小纭
+舒淇
+石原里美
+关之琳
+权志龙
+陈数
+程潇
+李小璐
+景甜
+奚梦瑶
+戚薇
+萧亚轩
+车晓
+沈梦辰
+陈妍希
+张予曦
+陈坤
+林珍娜
+宋慧乔
+孟佳
+张靓颖
+郭采洁
+白冰
+林允
+吉娜
+姚晨
+昆凌
+白百何
+沈月
+邓文迪
+王心凌
+杨雪
+朴敏英
+水原希子
+甘薇
+秀智
+高允真
+苟芸慧
+新桓结衣
+徐贞姬
+孙胜完
+郑采妍
+战战
+紫妹
+小猴子
+赵姐
+颖宝
+大幂幂
+妮妮
+喵总
+热巴
+范爷
+冰冰
+四千年
+老鞠，
+诗爷
+人间芭比
+小选
+肉丝
+超越妹妹
+杨颖
+baby
+国际章
+乔妹
+绮绮子
+怡宝
+张燕
+妹妹
+十元
+GD
+嫂子
+金莲
+大甜甜
+小明
+戚哥
+鲜肉菩萨
+小笼包
+乔妹
+费霞
+天王嫂
+国民初恋
+GAKKI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/build/lib/dicts/stopwords.dic
+++ b/build/lib/dicts/stopwords.dic
+“
+，
+“
+。
+”
+(
+)
+：
+
+⇙
+▼
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+--
+.
+..
+...
+......
+...................
+./
+.一
+.数
+.日
+/
+//
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+://
+::
+;
+<
+=
+>
+>>
+?
+@
+A
+Lex
+[
+\
+]
+^
+_
+`
+exp
+sub
+sup
+|
+}
+~
+~~~
+·
+×
+×××
+Δ
+Ψ
+γ
+μ
+φ
+φ．
+В
+—
+——
+———
+‘
+’
+’‘
+“
+”
+”，
+…
+……
+…………………………………………………③
+′∈
+′｜
+℃
+Ⅲ
+↑
+→
+∈［
+∪φ∈
+≈
+①
+②
+的
+我
+你
+了
+呢
+②ｃ
+③
+③］
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+──
+■
+▲
+　
+、
+。
+〈
+〉
+《
+》
+》），
+」
+『
+』
+【
+】
+〔
+〕
+〕〔
+㈧
+︿
+！
+＃
+＄
+％
+＆
+＇
+（
+）
+）÷（１－
+）、
+＊
+＋
+＋ξ
+＋＋
+，
+，也
+－
+－β
+－－
+－［＊］－
+．
+／
+０
+０：２
+１
+１．
+１２％
+２
+２．３％
+３
+４
+５
+５：０
+６
+７
+８
+９
+：
+；
+＜
+＜±
+＜Δ
+＜λ
+＜φ
+＜＜
+＝
+＝″
+＝☆
+＝（
+＝－
+＝［
+＝｛
+＞
+＞λ
+？
+＠
+Ａ
+ＬＩ
+Ｒ．Ｌ．
+ＺＸＦＩＴＬ
+［
+［①①］
+［①②］
+［①③］
+［①④］
+［①⑤］
+［①⑥］
+［①⑦］
+［①⑧］
+［①⑨］
+［①Ａ］
+［①Ｂ］
+［①Ｃ］
+［①Ｄ］
+［①Ｅ］
+［①］
+［①ａ］
+［①ｃ］
+［①ｄ］
+［①ｅ］
+［①ｆ］
+［①ｇ］
+［①ｈ］
+［①ｉ］
+［①ｏ］
+［②
+［②①］
+［②②］
+［②③］
+［②④
+［②⑤］
+［②⑥］
+［②⑦］
+［②⑧］
+［②⑩］
+［②Ｂ］
+［②Ｇ］
+［②］
+［②ａ］
+［②ｂ］
+［②ｃ］
+［②ｄ］
+［②ｅ］
+［②ｆ］
+［②ｇ］
+［②ｈ］
+［②ｉ］
+［②ｊ］
+［③①］
+［③⑩］
+［③Ｆ］
+［③］
+［③ａ］
+［③ｂ］
+［③ｃ］
+［③ｄ］
+［③ｅ］
+［③ｇ］
+［③ｈ］
+［④］
+［④ａ］
+［④ｂ］
+［④ｃ］
+［④ｄ］
+［④ｅ］
+［⑤］
+［⑤］］
+［⑤ａ］
+［⑤ｂ］
+［⑤ｄ］
+［⑤ｅ］
+［⑤ｆ］
+［⑥］
+［⑦］
+［⑧］
+［⑨］
+［⑩］
+［＊］
+［－
+［］
+］
+］∧′＝［
+］［
+＿
+ａ］
+ｂ］
+ｃ］
+ｅ］
+ｆ］
+ｎｇ昉
+｛
+｛－
+｜
+｝
+｝＞
+～
+～±
+～＋
+￥
--- a/build/lib/dicts/words.dic
+++ b/build/lib/dicts/words.dic
--- a/build/lib/preprocesser/__init__.py
+++ b/build/lib/preprocesser/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/build/lib/preprocesser/filter.py
+++ b/build/lib/preprocesser/filter.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+import re, os
+from config import config
+
+
+class Filter(object):
+    def __init__(self, file_path, encoding="utf-*"):
+        self.file_path = file_path
+        self.encoding = encoding
+        self.stopwords = set()
+
+    def filter(self):
+        raise NotImplementedError
+
+
+class StopwordsFilter(Filter):
+    def __init__(self, file_path, encoding="utf-8"):
+        super(StopwordsFilter, self).__init__(file_path, encoding)
+        self.init()
+
+    def remove_irregular_chars(self, corpus: str):
+        return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus)
+
+    def init(self):
+        for line in open(self.file_path, "r", encoding=self.encoding):
+            try:
+                line = line.strip()
+                self.stopwords.add(line)
+                self.stopwords.add("\n")
+                self.stopwords.add("")
+            except Exception as e:
+                print("{} process error".format(line))
+
+    def filter(self, token_list):
+        return [self.remove_irregular_chars(item.strip()) for item in token_list if
+                self.remove_irregular_chars(item) not in self.stopwords]
+
+
+root_path = "/".join(str(__file__).split("/")[:-2])
+stopwords_filter = StopwordsFilter(os.path.join(root_path, config.stopwords_path))
+
+print()
--- a/build/lib/preprocesser/pipeline.py
+++ b/build/lib/preprocesser/pipeline.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from tqdm import tqdm
+
+
+class Pipeline(object):
+    """
+    pipiline 定义数据的流程,
+    将文件中的词处理与与句为单位的tokenlist
+    """
+
+    def __init__(self):
+        self.pipelines = []
\ No newline at end of file
--- a/build/lib/preprocesser/processors.py
+++ b/build/lib/preprocesser/processors.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from abc import ABC
+
+from jieba import Tokenizer
+import re, os
+from config import config
+
+
+class SentenceSegmenter(object):
+    def __init__(self, split_pun=None):
+        if not split_pun:
+            self.split_pun = r'[;；.。，,！\n!?？]'
+        else:
+            self.split_pun = '{}'.format("".join(split_pun))
+
+    def split(self, sentences):
+        if isinstance(sentences, str):
+            for item in re.split(self.split_pun, sentences):
+                if item:
+                    yield item
+        else:
+            for sentence in sentences:
+                for item in re.split(self.split_pun, sentence):
+                    yield item
+
+
+class Processor(object):
+    def __init__(self, name):
+        self._name = name
+
+    def run(self):
+        raise NotImplementedError
+
+
+class StandardProcessor(Processor):
+    def __init__(self, name=""):
+        super(StandardProcessor, self).__init__(name)
+
+    def lcut(self, line):
+        return " ".join(line)
+
+
+class TokenizerProcessor(Processor, ABC):
+
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.tokenizer = None
+        self.init(self.file_path)
+
+    def init(self, dict_path=None):
+        tokenizer = Tokenizer(dictionary=dict_path)
+        tokenizer.initialize()
+        self.tokenizer = tokenizer
+
+    def lcut(self, line, cut_all=False):
+        """
+        当前只支持smart的切词方式
+        :param line:
+        :type line:
+        :return:
+        :rtype:
+        """
+        return self.tokenizer.lcut(line, HMM=True, cut_all=cut_all)
+
+
+root_path = "/".join(str(__file__).split("/")[:-2])
+token_processor = TokenizerProcessor(os.path.join(root_path, config.words_path))
--- a/config/__init__.py
+++ b/config/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/config/config.py
+++ b/config/config.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+DEBUG = False
+"""
+recall_topK：
+    我们选取多少个候选词
+sort_topK：
+    排序后挑选的词汇数量
+min_frequence:
+    所有词汇必须共现的次数
+stopwords_path：
+    停用词路径
+words_path:
+    词典path
+"""
+stopwords_path = "dicts/stopwords.dic"
+words_path = "dicts/words.dic"
+
+network_influcer_dic = "dicts/network_influcer.dic"
+projects_dic = "dicts/project.dic"
+star_dic = "dicts/star.dic"
--- a/dicts/__init__.py
+++ b/dicts/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/dicts/network_influcer.dic
+++ b/dicts/network_influcer.dic
+丁真
+周扬青
+冯提莫
+半藏森林
+艾比
+韩安冉
+南笙
+奶茶妹妹
+宋昕冉
+林小宅
+晚晚
+谢安然
+王柠萌
+Naomi
+于文红
+甜仇
+温精灵
+温婉
+Fiona宋亮
+李蒽熙
+una
+夏夏
+水野亚美
+小小如
+卓亨瑜
+彭王者
+滕雨佳
+腻腻ninii
+李恩童
+花珊珊
+小初
+小饼干
+晚妹
+吃一口甜
+徐清婉
+Jy小语
+张贤静
+施安妮
+周子然Femi
+XIZI杨
+彦崽儿
+潘白雪
+方恰拉
+MAGBOW
+CHU小初
+不求上进的柚砸
+LU一丝
+大佬儿
+姚淇瀚Henry
+周小濛
+半藏
+Abbily
+王嘉辉
+罗小伊
+章泽天
+林晓婷
+晚奶
+柠檬
+康雅馨
+仇琳琳
+温仙女
+张曼如
+雷婉婷
+土豆公主
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/dicts/project.dic
+++ b/dicts/project.dic
+眼部整形
+鼻部整形
+面部轮廓
+瘦身美体
+皮肤美容
+胸部塑身
+毛发管理
+私密整形
+口腔齿科
+口唇整形
+注射美容
+自体脂肪
+半永久
+健康管理
+其他
+眼科
+妇产科
+骨整形
+SPA/按摩
+耳部其他
+牙齿治疗
+口腔其他
+颈部
+除皱
+毛发其他
+药物脱毛
+手术面部提升
+自体脂肪面部年轻化
+耳部矫正
+丰耳垂
+妊娠纹
+瘦腿
+面部吸脂
+腰腹抽脂
+鼻头整形
+光子脱毛
+激光洗眉
+祛皱
+下眼睑
+自体脂肪面部填充.
+收缩毛孔
+自体脂肪填充全脸加prp
+美容SPA生活美容
+祛痘生活美容
+瘦身生活美容
+美肤生活美容
+产后塑形生活美容
+胸部塑形
+上睑整形
+下睑整形
+胸型美化
+耳部整形
+双眼皮
+开眼角
+祛眼袋
+祛黑眼圈
+隆鼻
+鼻翼
+鼻形矫正
+颧骨颧弓
+额头
+太阳穴
+酒窝
+祛疤痕痘印
+祛斑祛色素
+改善肤质
+下巴
+美白嫩肤
+牙齿整形
+美容冠
+填充卧蚕
+眼部修复
+鼻部修复
+丰胸（隆胸）
+乳头乳晕整形
+祛副乳
+胸形美化
+胸部修复
+中医减肥
+轮廓修复
+微整修复
+牙齿矫正
+牙齿美白
+洗牙
+补牙
+种植牙
+吸脂
+下颌角
+拔牙
+垫眉弓
+鼻基底
+两颚
+唇形
+抗衰紧致
+近视矫正
+远视矫正
+散光矫正
+斜视矫正
+眼科其他
+女性私密
+男性私密
+祛腋臭
+产后修复
+眼睑
+毛发移植
+超声溶脂
+射频溶脂
+玻尿酸
+清洁补水
+瘦脸除皱
+半永久妆
+自体脂肪面部填充
+牙贴面
+激光脱毛
+冷冻溶脂
+体检
+眼部护理
+胶原蛋白注射
+少女针
+童颜针
+皮肤检测
+鼻部综合
+眼部综合
+轮廓套餐
+光纤溶脂
+自体脂肪身体塑形
+胸部套餐
+注射美肤
+中医理疗
+口腔治疗
+妇科检查
+瘦脸
+其他美体
+吸脂失败修复
+其他种植
+头部养护
+眼科疾病
+预防保健
+妇科疾病
+产科
+手骨整形
+足骨整形
+腿骨整形
+身体护理
+手足护理
+面部护理
+鼻小柱
+人中
+自体脂肪填充失败修复
+鼻综合
+激光脱面部其他毛发
+切开双眼皮
+埋线双眼皮
+定点双眼皮
+开内眼角
+开外眼角
+内切祛眼袋
+外切祛眼袋
+激光祛眼袋
+吸脂祛眼袋
+上眼睑下垂矫正
+玻尿酸填充卧蚕
+自体脂肪填充泪沟
+玻尿酸填充泪沟
+切眉
+激光祛黑眼圈
+自体脂肪填充黑眼圈
+鼻部膨体假体取出
+驼峰鼻矫正
+鹰钩鼻矫正
+歪鼻矫正
+朝天鼻矫正
+缩鼻背
+手术缩鼻头
+自体脂肪隆鼻
+玻尿酸隆鼻
+自体软骨垫鼻尖
+注射瘦脸
+吸脂瘦脸
+自体脂肪丰苹果肌
+玻尿酸填充丰苹果肌
+颧骨内推
+玻尿酸丰下巴
+下巴前推
+自体脂肪丰下巴
+硅胶垫下巴
+膨体垫下巴
+缩下巴
+宽下巴矫正
+激光溶脂祛双下巴
+酒窝成形术
+自体脂肪丰面颊
+自体脂肪丰太阳穴
+玻尿酸丰太阳穴
+硅胶丰太阳穴
+膨体丰太阳穴
+自体脂肪丰额头
+玻尿酸丰额头
+硅胶丰额头
+膨体丰额头
+招风耳矫正
+大耳缩小术
+杯状耳矫正
+唇腭裂修复
+厚唇改薄术
+玻尿酸丰唇
+自体脂肪丰唇
+处女膜修复
+包皮手术
+填充阴唇
+阴唇整形
+注射祛腋臭
+激光祛腋臭
+激光脱发际线
+植眉
+种鬓角
+植睫毛
+黑脸娃娃
+激光脱唇毛
+激光祛红血丝
+光子嫩肤
+激光祛纹身
+玻尿酸除颈纹
+自体脂肪除颈纹
+玻尿酸除法令纹
+自体脂肪除法令纹
+玻尿酸除眉间纹
+胸部失败修复
+乳房再造
+假体隆胸
+吸脂祛副乳
+手术祛副乳
+乳头内陷矫正
+乳头缩小
+乳晕缩小
+乳晕漂染
+乳房缩小
+胸部下垂矫正
+中医减肥其他项目
+吸脂瘦背部
+自体脂肪丰臀
+产后塑形
+腹壁成形术
+膨体隆鼻
+硅胶隆鼻
+自体肋软骨隆鼻
+激光溶脂瘦脸
+注射瘦腿
+吸脂瘦大腿
+吸脂瘦小腿
+吸脂瘦手臂
+自体脂肪隆胸
+玻尿酸除面部细纹
+自体脂肪除面部细纹
+植发际线
+吸脂祛双下巴
+鼻翼缩小
+全瓷牙
+手术祛腋臭
+像素激光
+植胡须
+吸脂塑臀
+点阵激光
+玻尿酸丰面颊
+玻尿酸祛黑眼圈
+胶原蛋白填充丰唇
+胶原蛋白填充祛黑眼圈
+胶原蛋白注射隆鼻
+胶原蛋白丰太阳穴
+胶原蛋白填充丰下巴
+胶原蛋白填充隆胸
+胶原蛋白填充丰臀
+胶原蛋白填充除法令纹
+胶原蛋白填充丰面颊
+胶原蛋白填充泪沟
+下眼睑下至
+泪腺脱垂整形
+自体脂肪填充卧蚕
+真皮填充卧蚕
+埋线隆鼻
+鼻中隔软骨垫鼻头
+注射缩鼻头
+假体垫鼻基底
+玻尿酸垫鼻基底
+自体脂肪垫鼻基底
+鼻中隔软骨隆鼻
+鼻孔矫正
+溶脂针瘦脸
+颧骨提高
+凸嘴矫正
+面部不对称改善
+人工骨垫下巴
+胶原蛋白填充苹果肌
+白瓷娃娃
+热玛吉
+PRP自体血清美肤
+美白针
+手术祛疤
+综合祛疤
+注射祛疤
+激光点痣
+射频溶脂瘦手臂
+小腿神经阻断术
+假体丰臀
+吸脂瘦肩
+注射瘦肩
+点穴减肥
+针灸减肥
+埋线减肥
+错颌锁颌
+隐形矫正
+根管治疗
+M唇成形术
+阴茎延长
+阴茎增粗
+玻尿酸丰耳垂
+轮廓修复术
+胸部假体取出
+超声提升
+线雕
+双眼皮修复
+眶隔脂肪释放
+玻尿酸垫眉弓
+自体脂肪垫眉弓
+硅胶垫眉弓
+膨体垫眉弓
+上眼睑祛脂
+鼻小柱延长
+额头缩小
+人中缩短
+射频溶脂塑臀
+漂唇
+红蓝光祛痘
+化学剥脱祛斑
+冷光美白
+激光祛疤
+拉皮
+黄金微针
+激光除皱
+埋线除皱
+放射状角膜切开术
+准分子激光切削术
+准分子激光原位角膜磨镶术
+透明晶体摘除术
+远视镜片矫正
+激光屈光性角膜切削术
+圆柱镜矫正
+角膜接触镜矫正
+角膜屈光手术
+斜视镜片矫正
+斜视矫正手术
+眼科检查
+玻尿酸填充隆胸
+激光紧缩阴道
+眼袋修复
+鼻形态修复
+鼻部硅胶假体取出
+激光除妊娠纹
+黄金微针除妊娠纹
+冷冻祛腋臭
+半永久纹眉
+水氧活肤
+自体脂肪全面部填充
+镭射净肤
+半永久纹眼线
+水光针
+无针水光
+射频提升
+激光脱腋毛
+激光脱比基尼部位
+激光脱臂毛
+激光脱大腿/小腿毛
+彩光嫩肤
+耳软骨隆鼻
+果酸焕肤
+身体检查
+玻尿酸溶解酶
+眼综合
+开眼角修复
+合金烤瓷牙
+二氧化锆烤瓷牙
+超声波洗牙
+树脂补牙
+拔龋齿
+拔智齿
+嘴角上扬术
+口腔检查
+孕睫术
+玻尿酸精华导入
+双颚手术
+热拉提
+人工骨隆鼻
+自体真皮隆鼻
+鼻小柱缩短
+长鼻矫正
+颧弓降低
+微针祛痘坑
+激光祛斑
+皮秒激光
+小气泡美肤
+微针水光
+玻尿酸丰乳头
+乳头再造
+胶原蛋白注射丰乳晕
+提眉修复
+卧蚕手术修复
+半永久纹发际线
+颧骨填充术
+微晶瓷隆鼻
+激光祛脂肪粒
+少女针注射
+童颜针注射
+假体填充苹果肌
+抗敏修复
+洗眼线
+肤质检测
+阴蒂整形
+菲洛嘉
+HPV检查
+全脸整形套餐
+V脸套餐
+祛斑套餐
+美白套餐
+补水套餐
+皮肤综合管理套餐
+洗牙套餐
+唇部综合
+除皱套餐
+玻尿酸注射套餐
+乳腺检查
+全飞秒
+半飞秒
+ICL晶体植入
+异物取出
+美胸套餐
+激光脱全身毛发
+射频祛眼袋
+O型腿矫正
+X型腿矫正
+G点注射
+牙齿瓷贴面
+自体软骨垫鼻基底
+女性私密检查
+阴唇漂红
+手术紧缩阴道
+性腺激活
+种植牙齿
+下颌角切除术
+玻尿酸丰眼窝
+注射去鼻背纹
+注射去口周纹
+注射去动态纹
+注射治疗多汗
+注射颏肌放松
+美白导入
+埋线祛眼袋
+光纤溶脂祛眼袋
+微针祛黑眼圈
+射频祛黑眼圈
+眼综合修复
+宽鼻矫正
+颧骨颧弓整形术
+下巴截骨
+下巴硅胶假体取出
+下巴膨体假体取出
+下颚前突/地包天
+上颚前突/天包地
+下颌角整形
+取颊脂垫瘦脸
+射频溶脂瘦脸
+光纤溶脂瘦脸
+冷冻溶脂瘦腹部
+冷冻溶脂瘦腰部
+冷冻溶脂瘦手臂
+冷冻溶脂瘦大腿
+冷冻溶脂瘦小腿
+冷冻溶脂瘦肩膀
+冷冻溶脂瘦背部
+冷冻溶脂瘦臀部
+射频溶脂瘦大腿
+射频溶脂瘦小腿
+射频溶脂瘦肩膀
+射频溶脂瘦背部
+射频溶脂瘦腰部
+射频溶脂瘦腹部
+超声溶脂瘦手臂
+超声溶脂瘦腰部
+超声溶脂瘦腹部
+超声溶脂瘦大腿
+超声溶脂瘦小腿
+超声溶脂瘦臀部
+超声溶脂瘦背部
+超声溶脂瘦肩部
+光纤溶脂瘦手臂
+光纤溶脂瘦腰部
+光纤溶脂瘦腹部
+光纤溶脂瘦大腿
+光纤溶脂瘦小腿
+光纤溶脂瘦背部
+光纤溶脂瘦臀部
+光纤溶脂瘦肩部
+吸脂瘦腰部
+吸脂瘦腹部
+吸脂瘦全身
+吸脂失败修复术
+杏仁酸焕肤
+水杨酸焕肤
+清痘针
+冷冻祛痣
+手术祛胎记
+激光祛胎记
+洗眉
+小棕瓶美白
+水光娃娃
+种植面膜
+SMAS除皱手术
+内窥镜手术提升
+小切口手术提升
+干细胞疗法
+复合隆胸
+男性乳房肥大矫正
+头顶加密种植
+美人尖种植
+种植胸毛
+种植私密毛发
+疤痕种植毛发
+防脱理疗
+乌发理疗
+深层清洁
+养发护理
+头部SPA
+头皮护理
+头皮控油
+毛囊检测
+私密超声提升
+私密综合项目男
+私密综合项目女
+私密清洁
+私密护理
+牙齿综合
+颌面正畸
+金属托槽矫正
+自锁托槽矫正
+激光牙龈去色素
+皓齿美白
+激光美白牙
+贵金属烤瓷牙
+牙齿抛光
+喷砂洗牙
+其他材料补牙
+树脂贴面
+儿童口腔预防窝沟封闭
+mrc肌功能矫治
+口腔溃疡
+牙髓炎
+牙龈炎
+显微镜根管治疗
+牙周护理
+玻尿酸全脸填充
+嗨体祛颈纹
+注射祛法令纹
+注射祛鱼尾纹
+注射祛抬头纹
+注射祛川字纹
+注射祛木偶纹
+注射祛露龈笑
+注射祛面部细纹
+胶原蛋白填充额头
+胶原蛋白填充面部细纹
+副耳祛除术
+OK镜近视矫正
+绿飞秒
+后巩膜加固术
+视网膜脱落
+角膜炎
+沙眼
+散光
+结膜炎
+青光眼
+干眼护理
+眼部清洗
+眼镜试戴
+眼部穴位按摩
+宫颈癌疫苗
+基因检测
+中老年体检
+女性体检
+产后乳房护理
+阴道修复
+乳腺疏通
+盆底肌修复
+产后熏蒸
+盆腔炎
+宫颈炎
+卵巢囊肿
+子宫肌瘤
+月经不调
+阴道炎检查
+痛经检查
+备孕检查
+剖腹产手术
+四维彩超
+产检
+分娩
+坐月子
+肘内翻矫正
+肘外翻矫正
+手指巨指矫正
+多指矫正
+手指断指再造
+足内翻矫正
+马蹄足矫正
+长短腿矫正
+膝外翻矫正
+膝内翻矫正
+经络疏通
+艾灸理疗
+刮痧拔罐
+中医药浴
+中医按摩
+全身SPA
+背部护理
+美胸护理
+肩颈护理
+腿部护理
+臀部护理
+手部护理
+足疗
+眼部保养
+半永久睫毛线
+半永久纹唇
+接睫毛
+半永久美瞳线
+拔罐减肥
+内眦赘皮矫正
+大脚骨整形
+综合祛眼袋
+自体脂肪私密紧致
+私密脱毛
+脱脚毛
+脱背部
+脱络腮胡
+拔乳牙
+脂肪填充失败修复
+脂肪填充
\ No newline at end of file
--- a/dicts/star.dic
+++ b/dicts/star.dic
+肖战
+杨紫
+赵丽颖
+杨幂
+倪妮
+迪丽热巴
+范冰冰
+鞠婧祎
+刘诗诗
+Lisa
+吴宣仪
+赵露思
+杨超越
+Angelababy
+高圆圆
+章子怡
+乔欣
+张雨绮
+孙怡
+江疏影
+毛晓彤
+张馨予
+王祖贤
+张子枫
+陈小纭
+舒淇
+石原里美
+关之琳
+权志龙
+陈数
+程潇
+李小璐
+景甜
+奚梦瑶
+戚薇
+萧亚轩
+车晓
+沈梦辰
+陈妍希
+张予曦
+陈坤
+林珍娜
+宋慧乔
+孟佳
+张靓颖
+郭采洁
+白冰
+林允
+吉娜
+姚晨
+昆凌
+白百何
+沈月
+邓文迪
+王心凌
+杨雪
+朴敏英
+水原希子
+甘薇
+秀智
+高允真
+苟芸慧
+新桓结衣
+徐贞姬
+孙胜完
+郑采妍
+战战
+紫妹
+小猴子
+赵姐
+颖宝
+大幂幂
+妮妮
+喵总
+热巴
+范爷
+冰冰
+四千年
+老鞠，
+诗爷
+人间芭比
+小选
+肉丝
+超越妹妹
+杨颖
+baby
+国际章
+乔妹
+绮绮子
+怡宝
+张燕
+妹妹
+十元
+GD
+嫂子
+金莲
+大甜甜
+小明
+戚哥
+鲜肉菩萨
+小笼包
+乔妹
+费霞
+天王嫂
+国民初恋
+GAKKI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/dicts/stopwords.dic
+++ b/dicts/stopwords.dic
+“
+，
+“
+。
+”
+(
+)
+：
+
+⇙
+▼
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+--
+.
+..
+...
+......
+...................
+./
+.一
+.数
+.日
+/
+//
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+://
+::
+;
+<
+=
+>
+>>
+?
+@
+A
+Lex
+[
+\
+]
+^
+_
+`
+exp
+sub
+sup
+|
+}
+~
+~~~
+·
+×
+×××
+Δ
+Ψ
+γ
+μ
+φ
+φ．
+В
+—
+——
+———
+‘
+’
+’‘
+“
+”
+”，
+…
+……
+…………………………………………………③
+′∈
+′｜
+℃
+Ⅲ
+↑
+→
+∈［
+∪φ∈
+≈
+①
+②
+的
+我
+你
+了
+呢
+②ｃ
+③
+③］
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+──
+■
+▲
+　
+、
+。
+〈
+〉
+《
+》
+》），
+」
+『
+』
+【
+】
+〔
+〕
+〕〔
+㈧
+︿
+！
+＃
+＄
+％
+＆
+＇
+（
+）
+）÷（１－
+）、
+＊
+＋
+＋ξ
+＋＋
+，
+，也
+－
+－β
+－－
+－［＊］－
+．
+／
+０
+０：２
+１
+１．
+１２％
+２
+２．３％
+３
+４
+５
+５：０
+６
+７
+８
+９
+：
+；
+＜
+＜±
+＜Δ
+＜λ
+＜φ
+＜＜
+＝
+＝″
+＝☆
+＝（
+＝－
+＝［
+＝｛
+＞
+＞λ
+？
+＠
+Ａ
+ＬＩ
+Ｒ．Ｌ．
+ＺＸＦＩＴＬ
+［
+［①①］
+［①②］
+［①③］
+［①④］
+［①⑤］
+［①⑥］
+［①⑦］
+［①⑧］
+［①⑨］
+［①Ａ］
+［①Ｂ］
+［①Ｃ］
+［①Ｄ］
+［①Ｅ］
+［①］
+［①ａ］
+［①ｃ］
+［①ｄ］
+［①ｅ］
+［①ｆ］
+［①ｇ］
+［①ｈ］
+［①ｉ］
+［①ｏ］
+［②
+［②①］
+［②②］
+［②③］
+［②④
+［②⑤］
+［②⑥］
+［②⑦］
+［②⑧］
+［②⑩］
+［②Ｂ］
+［②Ｇ］
+［②］
+［②ａ］
+［②ｂ］
+［②ｃ］
+［②ｄ］
+［②ｅ］
+［②ｆ］
+［②ｇ］
+［②ｈ］
+［②ｉ］
+［②ｊ］
+［③①］
+［③⑩］
+［③Ｆ］
+［③］
+［③ａ］
+［③ｂ］
+［③ｃ］
+［③ｄ］
+［③ｅ］
+［③ｇ］
+［③ｈ］
+［④］
+［④ａ］
+［④ｂ］
+［④ｃ］
+［④ｄ］
+［④ｅ］
+［⑤］
+［⑤］］
+［⑤ａ］
+［⑤ｂ］
+［⑤ｄ］
+［⑤ｅ］
+［⑤ｆ］
+［⑥］
+［⑦］
+［⑧］
+［⑨］
+［⑩］
+［＊］
+［－
+［］
+］
+］∧′＝［
+］［
+＿
+ａ］
+ｂ］
+ｃ］
+ｅ］
+ｆ］
+ｎｇ昉
+｛
+｛－
+｜
+｝
+｝＞
+～
+～±
+～＋
+￥
--- a/dicts/words.dic
+++ b/dicts/words.dic
--- a/dist/gm_text_miner-1.0.0-py3.7.egg
+++ b/dist/gm_text_miner-1.0.0-py3.7.egg
--- a/dist/gm_text_miner-1.0.0.tar.gz
+++ b/dist/gm_text_miner-1.0.0.tar.gz
--- a/gm_text_miner.egg-info/PKG-INFO
+++ b/gm_text_miner.egg-info/PKG-INFO
+Metadata-Version: 1.1
+Name: gm-text-miner
+Version: 1.0.0
+Summary: classifical base word dict
+Home-page: UNKNOWN
+Author: crazyer
+Author-email: zhangguodong@igengmei.com
+License: UNKNOWN
+Description: UNKNOWN
+Platform: UNKNOWN
+Classifier: Programming Language :: Python :: 3
--- a/gm_text_miner.egg-info/SOURCES.txt
+++ b/gm_text_miner.egg-info/SOURCES.txt
+MANIFEST.in
+setup.py
+algorithm/__init__.py
+algorithm/text_classifical/__init__.py
+algorithm/text_classifical/base.py
+config/__init__.py
+config/config.py
+dicts/__init__.py
+dicts/network_influcer.dic
+dicts/project.dic
+dicts/star.dic
+dicts/stopwords.dic
+dicts/words.dic
+gm_text_miner.egg-info/PKG-INFO
+gm_text_miner.egg-info/SOURCES.txt
+gm_text_miner.egg-info/dependency_links.txt
+gm_text_miner.egg-info/top_level.txt
+preprocesser/__init__.py
+preprocesser/filter.py
+preprocesser/pipeline.py
+preprocesser/processors.py
\ No newline at end of file
--- a/gm_text_miner.egg-info/dependency_links.txt
+++ b/gm_text_miner.egg-info/dependency_links.txt
+
--- a/gm_text_miner.egg-info/top_level.txt
+++ b/gm_text_miner.egg-info/top_level.txt
+algorithm
+config
+dicts
+preprocesser
--- a/main.py
+++ b/main.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+
+from algorithm.text_classifical.base import model
+content = "双眼皮"
+content = """
+    🥰🥰38+看起来像20+ 我的驻颜秘密大公开
+ 
+😳😳前两天看网上有人说关之琳在30岁才开始大红，我们现在看到的作品很多都是她30以后的电影。女人很少能在这个年龄才开始出头，现在女星你三十都没有出名，后面像再靠颜值火起来根本是没有可能的。关美女能火那是因为她有别人不可比拟的骨相和皮相美。
+-
+👇🏻👇🏻如何让自己能在三十多看起来还像少女一样稚嫩呢？下嘛看看我们案例姑娘的保养秘诀
+-
+🌿🌿术前情况：随着年龄的增加，胶原蛋白的流失皮相一路向下。正常情况下三十岁左右的女人，应该是稍微丰腴一点会显得比较和蔼，有福相看起来会比较年轻。我们案例姑娘在这个年龄确
+-
+✍🏻️✍🏻️整形方案：面部填充。重点填充部位太阳穴和面颊部位，额捏角。
+-
+🎉🎉术后效果：如果你细看那些女明星，三十多还风采依旧的基本上都是做过填充的姑娘，典型的我们填充模板，王子文。填充后那颜值一路高歌。我们案例姑娘一样，填充后，面部线条柔和，没有突兀感所以颜值也是猛然提升。
+-
+🌈🌈术后七天：面部肿胀在三四天的时候达到高峰，七天左右开始消肿。
+-
+🥰🥰术后一个月：这个时候基本已经消肿完毕，但是术后效果并不是稳定期，术后效果也不是最好的。如果还有肿胀也是正常的，建议耐心等待。
+-
+☑️☑️术后三个月：术后效果稳定期，面部填充效果最好的时期无疑是三个月恢复期过后。这个时候填充脂肪细胞基本稳定，面部表情也会变得自然。
+    """
+print(model.run(content))
--- a/preprocesser/__init__.py
+++ b/preprocesser/__init__.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
--- a/preprocesser/filter.py
+++ b/preprocesser/filter.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+import re, os
+from config import config
+
+
+class Filter(object):
+    def __init__(self, file_path, encoding="utf-*"):
+        self.file_path = file_path
+        self.encoding = encoding
+        self.stopwords = set()
+
+    def filter(self):
+        raise NotImplementedError
+
+
+class StopwordsFilter(Filter):
+    def __init__(self, file_path, encoding="utf-8"):
+        super(StopwordsFilter, self).__init__(file_path, encoding)
+        self.init()
+
+    def remove_irregular_chars(self, corpus: str):
+        return re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", corpus)
+
+    def init(self):
+        for line in open(self.file_path, "r", encoding=self.encoding):
+            try:
+                line = line.strip()
+                self.stopwords.add(line)
+                self.stopwords.add("\n")
+                self.stopwords.add("")
+            except Exception as e:
+                print("{} process error".format(line))
+
+    def filter(self, token_list):
+        return [self.remove_irregular_chars(item.strip()) for item in token_list if
+                self.remove_irregular_chars(item) not in self.stopwords]
+
+
+root_path = "/".join(str(__file__).split("/")[:-2])
+stopwords_filter = StopwordsFilter(os.path.join(root_path, config.stopwords_path))
+
+print()
--- a/preprocesser/pipeline.py
+++ b/preprocesser/pipeline.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from tqdm import tqdm
+
+
+class Pipeline(object):
+    """
+    pipiline 定义数据的流程,
+    将文件中的词处理与与句为单位的tokenlist
+    """
+
+    def __init__(self):
+        self.pipelines = []
\ No newline at end of file
--- a/preprocesser/processors.py
+++ b/preprocesser/processors.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+from abc import ABC
+
+from jieba import Tokenizer
+import re, os
+from config import config
+
+
+class SentenceSegmenter(object):
+    def __init__(self, split_pun=None):
+        if not split_pun:
+            self.split_pun = r'[;；.。，,！\n!?？]'
+        else:
+            self.split_pun = '{}'.format("".join(split_pun))
+
+    def split(self, sentences):
+        if isinstance(sentences, str):
+            for item in re.split(self.split_pun, sentences):
+                if item:
+                    yield item
+        else:
+            for sentence in sentences:
+                for item in re.split(self.split_pun, sentence):
+                    yield item
+
+
+class Processor(object):
+    def __init__(self, name):
+        self._name = name
+
+    def run(self):
+        raise NotImplementedError
+
+
+class StandardProcessor(Processor):
+    def __init__(self, name=""):
+        super(StandardProcessor, self).__init__(name)
+
+    def lcut(self, line):
+        return " ".join(line)
+
+
+class TokenizerProcessor(Processor, ABC):
+
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.tokenizer = None
+        self.init(self.file_path)
+
+    def init(self, dict_path=None):
+        tokenizer = Tokenizer(dictionary=dict_path)
+        tokenizer.initialize()
+        self.tokenizer = tokenizer
+
+    def lcut(self, line, cut_all=False):
+        """
+        当前只支持smart的切词方式
+        :param line:
+        :type line:
+        :return:
+        :rtype:
+        """
+        return self.tokenizer.lcut(line, HMM=True, cut_all=cut_all)
+
+
+root_path = "/".join(str(__file__).split("/")[:-2])
+token_processor = TokenizerProcessor(os.path.join(root_path, config.words_path))
--- a/setup.py
+++ b/setup.py
+# -*- coding:utf-8 -*-
+# author:gm
+# mail: zhangguodong@igengmei.com
+# datetime:2020/4/24 3:32 下午
+# software: PyCharm
+
+import setuptools
+
+requires = [
+]
+
+dev_requires = [
+]
+
+setuptools.setup(
+    name='gm-text-miner',
+    version="1.0.0",
+    author="crazyer",
+    author_email="zhangguodong@igengmei.com",
+    description="classifical base word dict",
+    install_requires=requires,
+    url="",
+    packages=setuptools.find_packages(),
+    extra_requires={
+        'dev': dev_requires,
+    },
+    classifiers=[
+        "Programming Language :: Python :: 3",
+    ],
+    include_package_data=True
+)