Commit 5f8dd1e7 authored by litaolemo's avatar litaolemo

update

parent 5666e58f
......@@ -6,7 +6,6 @@
import datetime
import json
import traceback
import redis
import pymysql
from elasticsearch import Elasticsearch
......@@ -61,9 +60,9 @@ def user_portrait_scan_info():
for data_type in res_dic:
for tag in res_dic[data_type]:
if return_dict.get(tag):
return_dict[tag] = (data_type,return_dict[tag][1] + 1)
return_dict[tag] = (data_type, return_dict[tag][1] + 1)
else:
return_dict[tag] = (data_type,1)
return_dict[tag] = (data_type, 1)
except:
continue
# for data_list in res_dic:
......@@ -162,16 +161,16 @@ def get_device_num_from_es(word):
timeout='10s',
size=0,
body={"aggs": {
"NAME": {
"nested": {"path": "projects"}, "aggs": {
"NAME1": {
"terms": {"field": "projects.name", "size": 10000}
}
"NAME": {
"nested": {"path": "projects"}, "aggs": {
"NAME1": {
"terms": {"field": "projects.name", "size": 10000}
}
}
}
}
}
)
tractate_content_num = results["hits"]["total"]
return tractate_content_num
......@@ -232,7 +231,7 @@ def get_es_article_num(tag_dict):
)
answer_content_num = results["hits"]["total"]
except:
print("answer has no %s" %tag_type)
print("answer has no %s" % tag_type)
answer_content_num = 0
body = {
......@@ -330,9 +329,9 @@ def from_id_get_tag(card_id_dict):
index = ""
doc_type = ""
query_count = {
"diary":{},
"answer":{},
"tractate":{}
"diary": {},
"answer": {},
"tractate": {}
}
for card_type in card_id_dict:
if card_type == "diary":
......@@ -345,26 +344,27 @@ def from_id_get_tag(card_id_dict):
index = 'gm-dbmw-tractate-read'
doc_type = 'tractate'
for card_id in card_id_dict[card_type]:
res = es.get_source(index,doc_type,card_id)
res = es.get_source(index, doc_type, card_id)
# print(res)
first_demands = res.get("first_demands") if res.get("first_demands") else []
second_demands = res.get("second_demands") if res.get("second_demands") else []
first_solutions = res.get("first_solutions") if res.get("first_solutions") else []
second_solutions = res.get("second_solutions") if res.get("second_solutions") else []
first_positions = res.get("first_positions") if res.get("first_positions") else []
first_positions = res.get("first_positions") if res.get("positions") else []
second_positions = res.get("second_positions") if res.get("second_positions") else []
projects = res.get("projects") if res.get("projects") else []
projects = res.get("projects") if res.get("tags_v3") else []
word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
for word in word_count_list:
if word in query_count[doc_type]:
query_count[doc_type][word] = (doc_type,query_count[doc_type][word][1] + 1)
query_count[doc_type][word] = (doc_type, query_count[doc_type][word][1] + 1)
else:
query_count[doc_type][word] = (doc_type,1)
query_count[doc_type][word] = (doc_type, 1)
return query_count
def save_data_to_csv(user_portrait_dict,word_count_exposure):
all_data = [("user_portrait","tag_type","user_portrait_count","diary_exposure","answer_exposure","tractate_exposure")]
def save_data_to_csv(user_portrait_dict, word_count_exposure):
all_data = [
("user_portrait", "tag_type", "user_portrait_count", "diary_exposure", "answer_exposure", "tractate_exposure")]
for tag in user_portrait_dict:
data_type = ""
data_count = ""
......@@ -380,9 +380,8 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
answer_exposure = word_count_exposure["answer"].get(tag)
if word_count_exposure["tractate"].get(tag):
tractate_exposure = word_count_exposure["tractate"].get(tag)
all_data.append((data_type,data_count,diary_exposure[1],answer_exposure[1],tractate_exposure[1]))
print(tag,all_data[-1])
all_data.append((tag, data_type, data_count, diary_exposure[1], answer_exposure[1], tractate_exposure[1]))
print(tag, all_data[-1])
data = pd.DataFrame(all_data)
s = datetime.datetime.now()
......@@ -391,6 +390,7 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
# columns=columns
)
def parse_data():
demands_num = {}
# 获取画像数
......@@ -407,7 +407,8 @@ def parse_data():
# 获取曝光id对应的标签
word_count_exposure = from_id_get_tag(card_id_dict)
print(word_count_exposure)
save_data_to_csv(user_portrait_dict,word_count_exposure)
save_data_to_csv(user_portrait_dict, word_count_exposure)
if __name__ == "__main__":
parse_data()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment