Commit 187154b5 authored by litaolemo's avatar litaolemo

update

parent c06279c6
......@@ -29,7 +29,7 @@ es = Elasticsearch([
def user_portrait_scan_info():
res_dict = {}
return_dict = {}
try:
round = 0
all_count = 0
......@@ -45,7 +45,7 @@ def user_portrait_scan_info():
key = key
device_id = key.split(":")[-1]
all_count += 1
print(key)
# print(key)
# if user_portrait_is_empty(device_id):
# print(device_id)
# empty_count += 1
......@@ -55,17 +55,24 @@ def user_portrait_scan_info():
# just_projects_count += 1
# user_portrait_get_empty_candidates(device_id)
try:
res_dic = get_user_portrait_tag3_from_redis(device_id)
print(res_dic)
for data_type in res_dic:
for tag in res_dic[data_type]:
if return_dict.get(tag):
return_dict[tag] = (data_type,return_dict[tag][1] + 1)
else:
return_dict[tag] = (data_type,1)
except:
continue
# for data_list in res_dic:
# for data in data_list:
print("all count: " + str(all_count))
print("empty portrait: " + str(empty_count))
print("just projects portrait: " + str(just_projects_count))
return return_dict
except Exception as e:
print(e)
return {}
def get_user_portrait_tag3_redis_key(device_id):
......@@ -441,7 +448,11 @@ def get_data_by_mysql(host, port, user, passwd, db, sql):
def from_id_get_tag(card_id_dict):
index = ""
doc_type = ""
query_count = {}
query_count = {
"diary":{},
"answer":{},
"tractate":{}
}
for card_type in card_id_dict:
if card_type == "diary":
index = 'gm-dbmw-diary-read'
......@@ -464,27 +475,46 @@ def from_id_get_tag(card_id_dict):
projects = res.get("projects") if res.get("projects") else []
word_count_list = first_demands + second_demands + first_solutions + second_solutions + first_positions + second_positions + projects
for word in word_count_list:
if word in query_count:
query_count[word] += 1
if word in query_count[doc_type]:
query_count[doc_type][word] = (doc_type,query_count[doc_type][word][1] + 1)
else:
query_count[word] = 0
query_count[doc_type][word] = (doc_type,1)
return query_count
# def save_data_to_csv(all_tags, word_count_exposure):
# all_data = []
#
# data = pd.DataFrame(all_dic)
# s = datetime.datetime.now()
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-')
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030',
# # columns=columns
# )
def save_data_to_csv(all_tags,user_portrait_dict,word_count_exposure):
all_data = []
for tag in all_tags:
data_type = ""
data_count = ""
diary_exposure = 0
answer_exposure = 0
tractate_exposure = 0
user_portrait = user_portrait_dict.get(tag)
if user_portrait:
data_type, data_count = user_portrait
if word_count_exposure["diary"].get("tag"):
diary_exposure = word_count_exposure["diary"].get("tag")
if word_count_exposure["answer"].get("tag"):
answer_exposure = word_count_exposure["answer"].get("tag")
if word_count_exposure["tractate"].get("tag"):
tractate_exposure = word_count_exposure["tractate"].get("tag")
all_data.append((data_type,data_count,diary_exposure,answer_exposure,tractate_exposure))
print(all_data[-1])
# data = pd.DataFrame(all_data)
# s = datetime.datetime.now()
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-')
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030',
# # columns=columns
# )
def parse_data():
demands_num = {}
# 获取画像数
user_portrait_scan_info()
user_portrait_dict = user_portrait_scan_info()
# 获取全部标签
all_tags = get_channel_tags_info()
print(all_tags)
......@@ -496,7 +526,7 @@ def parse_data():
# 获取曝光id对应的标签
word_count_exposure = from_id_get_tag(card_id_dict)
print(word_count_exposure)
# save_data_to_csv(all_tags, word_count_exposure)
save_data_to_csv(all_tags,user_portrait_dict,word_count_exposure)
if __name__ == "__main__":
parse_data()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment