Commit 7ca95fde authored by 高雅喆's avatar 高雅喆

finished test/main.py

parent 64fa9ca5
......@@ -56,10 +56,10 @@ class CidRate(object):
def main():
answer_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_rate_result = [answer_rate_all,answer_rate_ios,answer_rate_android]
answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
if __name__ == '__main__':
......
# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from utils import *
from config import DIRECTORY_PATH
from getCidRate import *
from getClkCidUidRate import *
from getTopFeatures import *
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ',''))
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ',''), platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取 {点击次数 : 独立用户数}
def get_click_times_to_count_uid_df():
"""
rtype : pandas.DataFrame
"""
sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc"
uid_click_times = con_sql(sql)
uid_lst = [i[0] for i in uid_click_times]
click_times_lst = [i[1] for i in uid_click_times]
uid_click_times_df = pd.DataFrame({"uid":uid_lst,"click_times":click_times_lst})
df = uid_click_times_df.groupby(by="click_times",as_index=False).count()
return df
def main():
print("开始获取特征数据...")
#1. 比例特征
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_imp_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_imp_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_imp_rate_result = [answer_imp_rate_all,answer_imp_rate_ios,answer_imp_rate_android]
print("已获取answer曝光占比")
def main():
#1.2 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
activate_uid_ctr_all = get_activate_uid_ctr("all")
activate_uid_ctr_ios = get_activate_uid_ctr("ios")
activate_uid_ctr_android = get_activate_uid_ctr("android")
print("已获取活跃用户点击率")
#1.3 点击answer用户占比(=点击answer用户数/曝光answer用户数)
click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比")
#1.4 点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比")
#1.5 有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比")
#2. Top特征
#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
df = get_click_times_to_count_uid_df()
print("已获取用户点击次数分布")
#2.2 Top 100 diary(sorted by ctr)
top_diary_all = TopFeatures("all", "diary", 100).get_result("所有", 4, "ctr")
top_diary_ios = TopFeatures("ios", "diary", 100).get_result("苹果", 4, "ctr")
top_diary_android = TopFeatures("android", "diary", 100).get_result("安卓", 4, "ctr")
print("已获取 Top diary 特征")
#2.3 Top 100 Answer(sorted by ctr)
top_answer_all = TopFeatures("all", "answer", 100).get_result("所有", 2, "ctr")
top_answer_ios = TopFeatures("ios", "answer", 100).get_result("苹果", 2, "ctr")
top_answer_android = TopFeatures("android", "answer", 100).get_result("安卓", 2, "ctr")
print("已获取 Top answer 特征")
#2.4 Top 100 Question(sorted by click times)
top_question_all = TopFeatures("all", "question", 100).get_result("所有", 2, "ctr")
top_question_ios = TopFeatures("ios", "question", 100).get_result("苹果", 2, "ctr")
top_question_android = TopFeatures("android", "question", 100).get_result("安卓", 2, "ctr")
print("已获取 Top question 特征")
print("done")
if __name__ == '__main__':
main()
# -*- coding: UTF-8 -*-
import pymysql
import datetime
import pandas as pd
def con_sql(sql):
#从数据库的表里获取数据
......@@ -35,3 +36,51 @@ def get_yesterday_date():
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ',''))
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ',''), platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取 {点击次数 : 独立用户数}
def get_click_times_to_count_uid_df():
"""
rtype : pandas.DataFrame
"""
sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc"
uid_click_times = con_sql(sql)
uid_lst = [i[0] for i in uid_click_times]
click_times_lst = [i[1] for i in uid_click_times]
uid_click_times_df = pd.DataFrame({"uid":uid_lst,"click_times":click_times_lst})
df = uid_click_times_df.groupby(by="click_times",as_index=False).count()
return df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment