main.py 4.11 KB
# -*- coding: UTF-8 -*-
from utils import *
from config import DIRECTORY_PATH
from getCidRate import *
from getClkCidUidRate import *
from getTopFeatures import *



def main():

	print("开始获取特征数据...")
	#1. 比例特征
	#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
	answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
	answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
	answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
	answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
	print("已获取answer曝光占比")

	#1.2 diary曝光占比(=answer被曝光数/总cid被曝光数)
	diary_imp_rate_all = CidRate("all","diary").get_cid_imp_rate("所有")
	diary_imp_rate_ios = CidRate("ios","diary").get_cid_imp_rate("苹果")
	diary_imp_rate_android = CidRate("android","diary").get_cid_imp_rate("安卓")
	diary_imp_rate_result = [diary_imp_rate_all,diary_imp_rate_ios,diary_imp_rate_android]
	print("已获取diary曝光占比")

	#1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
	activate_uid_ctr_all = get_activate_uid_ctr("all")
	activate_uid_ctr_ios = get_activate_uid_ctr("ios")
	activate_uid_ctr_android = get_activate_uid_ctr("android")
	activate_uid_ctr_result = [activate_uid_ctr_all,activate_uid_ctr_ios,activate_uid_ctr_android]
	print("已获取活跃用户点击率")

	#1.4 点击answer用户占比(=点击answer用户数/曝光answer用户数)
	click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
	click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
	click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
	click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
	print("已获取点击answer用户占比")

	#1.5 点击diary用户占比(=点击diary用户数/曝光diary用户数)
	click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
	click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
	click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
	click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
	print("已获取点击diary用户占比")

	#1.6 有点击用户占比(=有点击用户数/有曝光用户数)
	click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
	click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
	click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
	click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
	print("已获取有点击用户占比")


	#2. Top特征
	#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
	df = get_click_times_to_count_uid_df()
	print("已获取用户点击次数分布")
	#2.2 Top 100 diary(sorted by ctr)
	top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr")
	top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr")
	top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr")
	print("已获取 Top diary 特征")
	#2.3 Top 100 Answer(sorted by ctr)
	top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr")
	top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr")
	top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr")
	print("已获取 Top answer 特征")
	#2.4 Top 100 Question(sorted by click times)
	top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr")
	top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr")
	top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr")
	print("已获取 Top question 特征")
	print("done")




	output_path = DIRECTORY_PATH + "result_{}.txt".format(get_yesterday_date())
	with open(output_path, 'w') as f:





if __name__ == '__main__':
	main()