update

5f8dd1e7 · litaolemo · 5666e58f · 5f8dd1e7
Commit 5f8dd1e7 authored Sep 10, 2020 by litaolemo
Hide whitespace changes
Inline Side-by-side

Showing with 25 additions and 24 deletions

portary_div_exposure.py utils/portary_div_exposure.py +25 -24

No files found.
--- a/utils/portary_div_exposure.py
+++ b/utils/portary_div_exposure.py
@@ -6,7 +6,6 @@
 import datetime
 import json
 import traceback
-
 import redis
 import pymysql
 from elasticsearch import Elasticsearch
@@ -61,9 +60,9 @@ def user_portrait_scan_info():
                    for data_type in res_dic:
                        for tag in res_dic[data_type]:
                            if return_dict.get(tag):
-                                return_dict[tag] = (data_type,return_dict[tag][1] + 1)
+                                return_dict[tag] = (data_type, return_dict[tag][1] + 1)
                            else:
-                                return_dict[tag] = (data_type,1)
+                                return_dict[tag] = (data_type, 1)
                except:
                    continue
                # for data_list in res_dic:
@@ -162,16 +161,16 @@ def get_device_num_from_es(word):
        timeout='10s',
        size=0,
        body={"aggs": {
-                "NAME": {
-                    "nested": {"path": "projects"}, "aggs": {
-                        "NAME1": {
-                            "terms": {"field": "projects.name", "size": 10000}
-                        }
+            "NAME": {
+                "nested": {"path": "projects"}, "aggs": {
+                    "NAME1": {
+                        "terms": {"field": "projects.name", "size": 10000}
                    }
-
                }
+
            }
        }
+        }
    )
    tractate_content_num = results["hits"]["total"]
    return tractate_content_num
@@ -232,7 +231,7 @@ def get_es_article_num(tag_dict):
                )
                answer_content_num = results["hits"]["total"]
            except:
-                print("answer has no %s" %tag_type)
+                print("answer has no %s" % tag_type)
                answer_content_num = 0

            body = {
@@ -330,9 +329,9 @@ def from_id_get_tag(card_id_dict):
    index = ""
    doc_type = ""
    query_count = {
-        "diary":{},
-        "answer":{},
-        "tractate":{}
+        "diary": {},
+        "answer": {},
+        "tractate": {}
    }
    for card_type in card_id_dict:
        if card_type == "diary":
@@ -345,26 +344,27 @@ def from_id_get_tag(card_id_dict):
            index = 'gm-dbmw-tractate-read'
            doc_type = 'tractate'
        for card_id in card_id_dict[card_type]:
-            res = es.get_source(index,doc_type,card_id)
+            res = es.get_source(index, doc_type, card_id)
            # print(res)
            first_demands = res.get("first_demands") if res.get("first_demands") else []
            second_demands = res.get("second_demands") if res.get("second_demands") else []
            first_solutions = res.get("first_solutions") if res.get("first_solutions") else []
            second_solutions = res.get("second_solutions") if res.get("second_solutions") else []
-            first_positions = res.get("first_positions") if res.get("first_positions") else []
+            first_positions = res.get("first_positions") if res.get("positions") else []
            second_positions = res.get("second_positions") if res.get("second_positions") else []
-            projects = res.get("projects") if res.get("projects") else []
+            projects = res.get("projects") if res.get("tags_v3") else []
            word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
            for word in word_count_list:
                if word in query_count[doc_type]:
-                    query_count[doc_type][word] = (doc_type,query_count[doc_type][word][1] + 1)
+                    query_count[doc_type][word] = (doc_type, query_count[doc_type][word][1] + 1)
                else:
-                    query_count[doc_type][word] = (doc_type,1)
+                    query_count[doc_type][word] = (doc_type, 1)
    return query_count


-def save_data_to_csv(user_portrait_dict,word_count_exposure):
-    all_data = [("user_portrait","tag_type","user_portrait_count","diary_exposure","answer_exposure","tractate_exposure")]
+def save_data_to_csv(user_portrait_dict, word_count_exposure):
+    all_data = [
+        ("user_portrait", "tag_type", "user_portrait_count", "diary_exposure", "answer_exposure", "tractate_exposure")]
    for tag in user_portrait_dict:
        data_type = ""
        data_count = ""
@@ -380,9 +380,8 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
            answer_exposure = word_count_exposure["answer"].get(tag)
        if word_count_exposure["tractate"].get(tag):
            tractate_exposure = word_count_exposure["tractate"].get(tag)
-        all_data.append((data_type,data_count,diary_exposure[1],answer_exposure[1],tractate_exposure[1]))
-        print(tag,all_data[-1])
-
+        all_data.append((tag, data_type, data_count, diary_exposure[1], answer_exposure[1], tractate_exposure[1]))
+        print(tag, all_data[-1])

    data = pd.DataFrame(all_data)
    s = datetime.datetime.now()
@@ -391,6 +390,7 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
                # columns=columns
                )

+
 def parse_data():
    demands_num = {}
    # 获取画像数
@@ -407,7 +407,8 @@ def parse_data():
    # 获取曝光id对应的标签
    word_count_exposure = from_id_get_tag(card_id_dict)
    print(word_count_exposure)
-    save_data_to_csv(user_portrait_dict,word_count_exposure)
+    save_data_to_csv(user_portrait_dict, word_count_exposure)
+

 if __name__ == "__main__":
    parse_data()