# -*- coding: UTF-8 -*- from utils import * from config import DIRECTORY_PATH from getCidRate import * from getClkCidUidRate import * from getTopFeatures import * def main(): print("开始获取特征数据...") #1. 比例特征 #1.1 answer曝光占比(=answer被曝光数/总cid被曝光数) answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有") answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果") answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓") answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android] print("已获取answer曝光占比") #1.2 diary曝光占比(=answer被曝光数/总cid被曝光数) diary_imp_rate_all = CidRate("all","diary").get_cid_imp_rate("所有") diary_imp_rate_ios = CidRate("ios","diary").get_cid_imp_rate("苹果") diary_imp_rate_android = CidRate("android","diary").get_cid_imp_rate("安卓") diary_imp_rate_result = [diary_imp_rate_all,diary_imp_rate_ios,diary_imp_rate_android] print("已获取diary曝光占比") #1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数) activate_uid_ctr_all = get_activate_uid_ctr("all") activate_uid_ctr_ios = get_activate_uid_ctr("ios") activate_uid_ctr_android = get_activate_uid_ctr("android") activate_uid_ctr_result = [activate_uid_ctr_all,activate_uid_ctr_ios,activate_uid_ctr_android] print("已获取活跃用户点击率") #1.4 点击answer用户占比(=点击answer用户数/曝光answer用户数) click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有") click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果") click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓") click_answer_result = [click_answer_all,click_answer_ios,click_answer_android] print("已获取点击answer用户占比") #1.5 点击diary用户占比(=点击diary用户数/曝光diary用户数) click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有") click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果") click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓") click_diary_result = [click_diary_all,click_diary_ios,click_diary_android] print("已获取点击diary用户占比") #1.6 有点击用户占比(=有点击用户数/有曝光用户数) click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有") click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果") click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓") click_everything_result = [click_everything_all,click_everything_ios,click_everything_android] print("已获取有点击用户占比") #2. Top特征 #2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量) df = get_click_times_to_count_uid_df() print("已获取用户点击次数分布") #2.2 Top 100 diary(sorted by ctr) top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr") top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr") top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr") print("已获取 Top diary 特征") #2.3 Top 100 Answer(sorted by ctr) top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr") top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr") top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr") print("已获取 Top answer 特征") #2.4 Top 100 Question(sorted by click times) top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr") top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr") top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr") print("已获取 Top question 特征") print("done") output_path = DIRECTORY_PATH + "result_{}.txt".format(get_yesterday_date()) with open(output_path, 'w') as f: if __name__ == '__main__': main()