Commit 5f8dd1e7 authored by litaolemo's avatar litaolemo

update

parent 5666e58f
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
import datetime import datetime
import json import json
import traceback import traceback
import redis import redis
import pymysql import pymysql
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
...@@ -61,9 +60,9 @@ def user_portrait_scan_info(): ...@@ -61,9 +60,9 @@ def user_portrait_scan_info():
for data_type in res_dic: for data_type in res_dic:
for tag in res_dic[data_type]: for tag in res_dic[data_type]:
if return_dict.get(tag): if return_dict.get(tag):
return_dict[tag] = (data_type,return_dict[tag][1] + 1) return_dict[tag] = (data_type, return_dict[tag][1] + 1)
else: else:
return_dict[tag] = (data_type,1) return_dict[tag] = (data_type, 1)
except: except:
continue continue
# for data_list in res_dic: # for data_list in res_dic:
...@@ -162,16 +161,16 @@ def get_device_num_from_es(word): ...@@ -162,16 +161,16 @@ def get_device_num_from_es(word):
timeout='10s', timeout='10s',
size=0, size=0,
body={"aggs": { body={"aggs": {
"NAME": { "NAME": {
"nested": {"path": "projects"}, "aggs": { "nested": {"path": "projects"}, "aggs": {
"NAME1": { "NAME1": {
"terms": {"field": "projects.name", "size": 10000} "terms": {"field": "projects.name", "size": 10000}
}
} }
} }
} }
} }
}
) )
tractate_content_num = results["hits"]["total"] tractate_content_num = results["hits"]["total"]
return tractate_content_num return tractate_content_num
...@@ -232,7 +231,7 @@ def get_es_article_num(tag_dict): ...@@ -232,7 +231,7 @@ def get_es_article_num(tag_dict):
) )
answer_content_num = results["hits"]["total"] answer_content_num = results["hits"]["total"]
except: except:
print("answer has no %s" %tag_type) print("answer has no %s" % tag_type)
answer_content_num = 0 answer_content_num = 0
body = { body = {
...@@ -330,9 +329,9 @@ def from_id_get_tag(card_id_dict): ...@@ -330,9 +329,9 @@ def from_id_get_tag(card_id_dict):
index = "" index = ""
doc_type = "" doc_type = ""
query_count = { query_count = {
"diary":{}, "diary": {},
"answer":{}, "answer": {},
"tractate":{} "tractate": {}
} }
for card_type in card_id_dict: for card_type in card_id_dict:
if card_type == "diary": if card_type == "diary":
...@@ -345,26 +344,27 @@ def from_id_get_tag(card_id_dict): ...@@ -345,26 +344,27 @@ def from_id_get_tag(card_id_dict):
index = 'gm-dbmw-tractate-read' index = 'gm-dbmw-tractate-read'
doc_type = 'tractate' doc_type = 'tractate'
for card_id in card_id_dict[card_type]: for card_id in card_id_dict[card_type]:
res = es.get_source(index,doc_type,card_id) res = es.get_source(index, doc_type, card_id)
# print(res) # print(res)
first_demands = res.get("first_demands") if res.get("first_demands") else [] first_demands = res.get("first_demands") if res.get("first_demands") else []
second_demands = res.get("second_demands") if res.get("second_demands") else [] second_demands = res.get("second_demands") if res.get("second_demands") else []
first_solutions = res.get("first_solutions") if res.get("first_solutions") else [] first_solutions = res.get("first_solutions") if res.get("first_solutions") else []
second_solutions = res.get("second_solutions") if res.get("second_solutions") else [] second_solutions = res.get("second_solutions") if res.get("second_solutions") else []
first_positions = res.get("first_positions") if res.get("first_positions") else [] first_positions = res.get("first_positions") if res.get("positions") else []
second_positions = res.get("second_positions") if res.get("second_positions") else [] second_positions = res.get("second_positions") if res.get("second_positions") else []
projects = res.get("projects") if res.get("projects") else [] projects = res.get("projects") if res.get("tags_v3") else []
word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
for word in word_count_list: for word in word_count_list:
if word in query_count[doc_type]: if word in query_count[doc_type]:
query_count[doc_type][word] = (doc_type,query_count[doc_type][word][1] + 1) query_count[doc_type][word] = (doc_type, query_count[doc_type][word][1] + 1)
else: else:
query_count[doc_type][word] = (doc_type,1) query_count[doc_type][word] = (doc_type, 1)
return query_count return query_count
def save_data_to_csv(user_portrait_dict,word_count_exposure): def save_data_to_csv(user_portrait_dict, word_count_exposure):
all_data = [("user_portrait","tag_type","user_portrait_count","diary_exposure","answer_exposure","tractate_exposure")] all_data = [
("user_portrait", "tag_type", "user_portrait_count", "diary_exposure", "answer_exposure", "tractate_exposure")]
for tag in user_portrait_dict: for tag in user_portrait_dict:
data_type = "" data_type = ""
data_count = "" data_count = ""
...@@ -380,9 +380,8 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure): ...@@ -380,9 +380,8 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
answer_exposure = word_count_exposure["answer"].get(tag) answer_exposure = word_count_exposure["answer"].get(tag)
if word_count_exposure["tractate"].get(tag): if word_count_exposure["tractate"].get(tag):
tractate_exposure = word_count_exposure["tractate"].get(tag) tractate_exposure = word_count_exposure["tractate"].get(tag)
all_data.append((data_type,data_count,diary_exposure[1],answer_exposure[1],tractate_exposure[1])) all_data.append((tag, data_type, data_count, diary_exposure[1], answer_exposure[1], tractate_exposure[1]))
print(tag,all_data[-1]) print(tag, all_data[-1])
data = pd.DataFrame(all_data) data = pd.DataFrame(all_data)
s = datetime.datetime.now() s = datetime.datetime.now()
...@@ -391,6 +390,7 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure): ...@@ -391,6 +390,7 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
# columns=columns # columns=columns
) )
def parse_data(): def parse_data():
demands_num = {} demands_num = {}
# 获取画像数 # 获取画像数
...@@ -407,7 +407,8 @@ def parse_data(): ...@@ -407,7 +407,8 @@ def parse_data():
# 获取曝光id对应的标签 # 获取曝光id对应的标签
word_count_exposure = from_id_get_tag(card_id_dict) word_count_exposure = from_id_get_tag(card_id_dict)
print(word_count_exposure) print(word_count_exposure)
save_data_to_csv(user_portrait_dict,word_count_exposure) save_data_to_csv(user_portrait_dict, word_count_exposure)
if __name__ == "__main__": if __name__ == "__main__":
parse_data() parse_data()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment