Commit 187154b5 authored by litaolemo's avatar litaolemo

update

parent c06279c6
...@@ -29,7 +29,7 @@ es = Elasticsearch([ ...@@ -29,7 +29,7 @@ es = Elasticsearch([
def user_portrait_scan_info(): def user_portrait_scan_info():
res_dict = {} return_dict = {}
try: try:
round = 0 round = 0
all_count = 0 all_count = 0
...@@ -45,7 +45,7 @@ def user_portrait_scan_info(): ...@@ -45,7 +45,7 @@ def user_portrait_scan_info():
key = key key = key
device_id = key.split(":")[-1] device_id = key.split(":")[-1]
all_count += 1 all_count += 1
print(key) # print(key)
# if user_portrait_is_empty(device_id): # if user_portrait_is_empty(device_id):
# print(device_id) # print(device_id)
# empty_count += 1 # empty_count += 1
...@@ -55,17 +55,24 @@ def user_portrait_scan_info(): ...@@ -55,17 +55,24 @@ def user_portrait_scan_info():
# just_projects_count += 1 # just_projects_count += 1
# user_portrait_get_empty_candidates(device_id) # user_portrait_get_empty_candidates(device_id)
res_dic = get_user_portrait_tag3_from_redis(device_id) try:
print(res_dic) res_dic = get_user_portrait_tag3_from_redis(device_id)
print(res_dic)
for data_type in res_dic:
for tag in res_dic[data_type]:
if return_dict.get(tag):
return_dict[tag] = (data_type,return_dict[tag][1] + 1)
else:
return_dict[tag] = (data_type,1)
except:
continue
# for data_list in res_dic: # for data_list in res_dic:
# for data in data_list: # for data in data_list:
return return_dict
print("all count: " + str(all_count))
print("empty portrait: " + str(empty_count))
print("just projects portrait: " + str(just_projects_count))
except Exception as e: except Exception as e:
print(e) print(e)
return {}
def get_user_portrait_tag3_redis_key(device_id): def get_user_portrait_tag3_redis_key(device_id):
...@@ -441,7 +448,11 @@ def get_data_by_mysql(host, port, user, passwd, db, sql): ...@@ -441,7 +448,11 @@ def get_data_by_mysql(host, port, user, passwd, db, sql):
def from_id_get_tag(card_id_dict): def from_id_get_tag(card_id_dict):
index = "" index = ""
doc_type = "" doc_type = ""
query_count = {} query_count = {
"diary":{},
"answer":{},
"tractate":{}
}
for card_type in card_id_dict: for card_type in card_id_dict:
if card_type == "diary": if card_type == "diary":
index = 'gm-dbmw-diary-read' index = 'gm-dbmw-diary-read'
...@@ -464,27 +475,46 @@ def from_id_get_tag(card_id_dict): ...@@ -464,27 +475,46 @@ def from_id_get_tag(card_id_dict):
projects = res.get("projects") if res.get("projects") else [] projects = res.get("projects") if res.get("projects") else []
word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
for word in word_count_list: for word in word_count_list:
if word in query_count: if word in query_count[doc_type]:
query_count[word] += 1 query_count[doc_type][word] = (doc_type,query_count[doc_type][word][1] + 1)
else: else:
query_count[word] = 0 query_count[doc_type][word] = (doc_type,1)
return query_count return query_count
# def save_data_to_csv(all_tags, word_count_exposure): def save_data_to_csv(all_tags,user_portrait_dict,word_count_exposure):
# all_data = [] all_data = []
#
# data = pd.DataFrame(all_dic) for tag in all_tags:
# s = datetime.datetime.now() data_type = ""
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-') data_count = ""
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030', diary_exposure = 0
# # columns=columns answer_exposure = 0
# ) tractate_exposure = 0
user_portrait = user_portrait_dict.get(tag)
if user_portrait:
data_type, data_count = user_portrait
if word_count_exposure["diary"].get("tag"):
diary_exposure = word_count_exposure["diary"].get("tag")
if word_count_exposure["answer"].get("tag"):
answer_exposure = word_count_exposure["answer"].get("tag")
if word_count_exposure["tractate"].get("tag"):
tractate_exposure = word_count_exposure["tractate"].get("tag")
all_data.append((data_type,data_count,diary_exposure,answer_exposure,tractate_exposure))
print(all_data[-1])
# data = pd.DataFrame(all_data)
# s = datetime.datetime.now()
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-')
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030',
# # columns=columns
# )
def parse_data(): def parse_data():
demands_num = {} demands_num = {}
# 获取画像数 # 获取画像数
user_portrait_scan_info() user_portrait_dict = user_portrait_scan_info()
# 获取全部标签 # 获取全部标签
all_tags = get_channel_tags_info() all_tags = get_channel_tags_info()
print(all_tags) print(all_tags)
...@@ -496,7 +526,7 @@ def parse_data(): ...@@ -496,7 +526,7 @@ def parse_data():
# 获取曝光id对应的标签 # 获取曝光id对应的标签
word_count_exposure = from_id_get_tag(card_id_dict) word_count_exposure = from_id_get_tag(card_id_dict)
print(word_count_exposure) print(word_count_exposure)
# save_data_to_csv(all_tags, word_count_exposure) save_data_to_csv(all_tags,user_portrait_dict,word_count_exposure)
if __name__ == "__main__": if __name__ == "__main__":
parse_data() parse_data()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment