Commit 85cd8cd7 authored by 高雅喆's avatar 高雅喆

change format App Store and add a test/main.py

parent 6a061403
...@@ -4,7 +4,7 @@ from utils import con_sql ...@@ -4,7 +4,7 @@ from utils import con_sql
class CidRate(object): class CidRate(object):
def __init__(self, ndays, platform, cid_type): def __init__(self, platform, cid_type, ndays=1):
""" """
ndays : 1;2;3;4.. #The number of days from the current time ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android' platform : 'all';'ios';'android'
...@@ -12,9 +12,9 @@ class CidRate(object): ...@@ -12,9 +12,9 @@ class CidRate(object):
""" """
self.ndays = ndays self.ndays = ndays
if platform == "ios": if platform == "ios":
self.platform = "='AppStore'" self.platform = "='App Store'"
elif platform == "android": elif platform == "android":
self.platform = "!='AppStore'" self.platform = "!='App Store'"
else: else:
self.platform = " is not null" self.platform = " is not null"
self.cid_type = cid_type self.cid_type = cid_type
...@@ -24,15 +24,14 @@ class CidRate(object): ...@@ -24,15 +24,14 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql_cid = "select count(cid) from data_feed_click \ sql_cid = "select count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type) and device_type{1} \
and cid_type='{2}'".format(self.ndays,self.platform.replace(' ',''),self.cid_type)
cid_clk_count = con_sql(sql_cid)[0][0] cid_clk_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_click \ sql_all = "select count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(self.ndays, self.platform) and device_type{1}".format(self.ndays, self.platform.replace(' ',''))
all_clk_count = con_sql(sql_all)[0][0] all_clk_count = con_sql(sql_all)[0][0]
cid_clk_rate = round(cid_clk_count/all_clk_count,4) cid_clk_rate = round(cid_clk_count/all_clk_count,4)
return [platform,cid_clk_count,all_clk_count,cid_clk_rate] return [platform,cid_clk_count,all_clk_count,cid_clk_rate]
...@@ -43,8 +42,6 @@ class CidRate(object): ...@@ -43,8 +42,6 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql_cid = "select count(cid) from data_feed_exposure \ sql_cid = "select count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type) and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type)
...@@ -59,9 +56,9 @@ class CidRate(object): ...@@ -59,9 +56,9 @@ class CidRate(object):
def main(): def main():
answer_rate_all = CidRate(1,"all","answer").get_cid_imp_rate("所有") answer_rate_all = CidRate("all","answer").get_cid_imp_rate("所有")
answer_rate_ios = CidRate(1,"ios","answer").get_cid_imp_rate("苹果") answer_rate_ios = CidRate("ios","answer").get_cid_imp_rate("苹果")
answer_rate_android = CidRate(1,"android","answer").get_cid_imp_rate("安卓") answer_rate_android = CidRate("android","answer").get_cid_imp_rate("安卓")
answer_rate_result = [answer_rate_all,answer_rate_ios,answer_rate_android] answer_rate_result = [answer_rate_all,answer_rate_ios,answer_rate_android]
......
...@@ -3,7 +3,7 @@ from utils import con_sql ...@@ -3,7 +3,7 @@ from utils import con_sql
class ClkCidUidRate(object): class ClkCidUidRate(object):
def __init__(self, ndays, platform, cid_type): def __init__(self, platform, cid_type, ndays=1):
""" """
ndays : 1;2;3;4.. #The number of days from the current time ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android' platform : 'all';'ios';'android'
...@@ -11,9 +11,9 @@ class ClkCidUidRate(object): ...@@ -11,9 +11,9 @@ class ClkCidUidRate(object):
""" """
self.ndays = ndays self.ndays = ndays
if platform == "ios": if platform == "ios":
self.platform = "='AppStore'" self.platform = "='App Store'"
elif platform == "android": elif platform == "android":
self.platform = "!='AppStore'" self.platform = "!='App Store'"
else: else:
self.platform = " is not null" self.platform = " is not null"
if cid_type == "everything": if cid_type == "everything":
...@@ -26,12 +26,10 @@ class ClkCidUidRate(object): ...@@ -26,12 +26,10 @@ class ClkCidUidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql_clk = "select count(distinct(device_id)) from data_feed_click \ sql_clk = "select count(distinct(device_id)) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \ and device_type{1} \
and cid_type{2}".format(self.ndays,self.platform,self.cid_type) and cid_type{2}".format(self.ndays,self.platform.replace(' ',''),self.cid_type)
clk_count = con_sql(sql_clk)[0][0] clk_count = con_sql(sql_clk)[0][0]
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格 if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
...@@ -53,15 +51,15 @@ class ClkCidUidRate(object): ...@@ -53,15 +51,15 @@ class ClkCidUidRate(object):
def main(): def main():
#1.点击diary用户占比 #1.点击diary用户占比
click_diary_all = ClkCidUidRate(1,"all","diary").get_clk_cid_uid_rate("所有") click_diary_all = ClkCidUidRate("all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate(1,"ios","diary").get_clk_cid_uid_rate("苹果") click_diary_ios = ClkCidUidRate("ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate(1,"android","diary").get_clk_cid_uid_rate("安卓") click_diary_android = ClkCidUidRate("android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android] click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比") print("已获取点击diary用户占比")
#2.点击answer用户占比 #2.点击answer用户占比
click_answer_all = ClkCidUidRate(1,"all","answer").get_clk_cid_uid_rate("所有") click_answer_all = ClkCidUidRate("all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate(1,"ios","answer").get_clk_cid_uid_rate("苹果") click_answer_ios = ClkCidUidRate("ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate(1,"android","answer").get_clk_cid_uid_rate("安卓") click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android] click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比") print("已获取点击answer用户占比")
#3.点击question用户占比(曝光表里cid类型没有question,因此下面的曝光数为0,0不能作分母) #3.点击question用户占比(曝光表里cid类型没有question,因此下面的曝光数为0,0不能作分母)
...@@ -71,9 +69,9 @@ def main(): ...@@ -71,9 +69,9 @@ def main():
#click_question_result = [click_question_all,click_question_ios,click_question_android] #click_question_result = [click_question_all,click_question_ios,click_question_android]
#print("已获取点击question用户占比") #print("已获取点击question用户占比")
#4.有点击用户占比 #4.有点击用户占比
click_everything_all = ClkCidUidRate(1,"all","everything").get_clk_cid_uid_rate("所有") click_everything_all = ClkCidUidRate("all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate(1,"ios","everything").get_clk_cid_uid_rate("苹果") click_everything_ios = ClkCidUidRate("ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate(1,"android","everything").get_clk_cid_uid_rate("安卓") click_everything_android = ClkCidUidRate("android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android] click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比") print("已获取有点击用户占比")
......
...@@ -4,7 +4,7 @@ from config import DIRECTORY_PATH ...@@ -4,7 +4,7 @@ from config import DIRECTORY_PATH
class TopFeatures(object): class TopFeatures(object):
def __init__(self, ndays, platform, cid_type, top_n=-1): def __init__(self, platform, cid_type, top_n=-1, ndays=1):
""" """
ndays : 1;2;3;4.. #The number of days from the current time ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android' platform : 'all';'ios';'android'
...@@ -13,9 +13,9 @@ class TopFeatures(object): ...@@ -13,9 +13,9 @@ class TopFeatures(object):
""" """
self.ndays = ndays self.ndays = ndays
if platform == "ios": if platform == "ios":
self.platform = "='AppStore'" self.platform = "='App Store'"
elif platform == "android": elif platform == "android":
self.platform = "!='AppStore'" self.platform = "!='App Store'"
else: else:
self.platform = " is not null" self.platform = " is not null"
self.cid_type = cid_type self.cid_type = cid_type
...@@ -24,20 +24,17 @@ class TopFeatures(object): ...@@ -24,20 +24,17 @@ class TopFeatures(object):
def get_click_times(self): def get_click_times(self):
# rtype : dict # rtype : dict
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql = "select cid,count(cid) from data_feed_click \ sql = "select cid,count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \ and device_type{1} and cid_type='{2}' \
group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type) group by cid \
order by count(cid) desc".format(self.ndays, self.platform.replace(' ',''), self.cid_type)
clk_times = tuple2dict(con_sql(sql)) clk_times = tuple2dict(con_sql(sql))
return clk_times return clk_times
def get_impression_times(self): def get_impression_times(self):
# rtype : dict # rtype : dict
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql = "select cid,count(cid) from data_feed_exposure \ sql = "select cid,count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \ and device_type{1} and cid_type='{2}' \
...@@ -117,17 +114,17 @@ class TopFeatures(object): ...@@ -117,17 +114,17 @@ class TopFeatures(object):
def main(): def main():
#1. Top diary #1. Top diary
top_diary_all = TopFeatures(1, "all", "diary", 100) top_diary_all = TopFeatures("all", "diary", 100)
clk_diary_times_all = top_diary_all.get_click_times() clk_diary_times_all = top_diary_all.get_click_times()
imp_diary_times_all = top_diary_all.get_impression_times() imp_diary_times_all = top_diary_all.get_impression_times()
clk_diary_ctr_all = top_diary_all.get_result("所有",clk_diary_times_all, imp_diary_times_all, 4, "ctr") clk_diary_ctr_all = top_diary_all.get_result("所有",clk_diary_times_all, imp_diary_times_all, 4, "ctr")
top_diary_ios = TopFeatures(1, "ios", "diary", 100) top_diary_ios = TopFeatures("ios", "diary", 100)
clk_diary_times_ios = top_diary_ios.get_click_times() clk_diary_times_ios = top_diary_ios.get_click_times()
imp_diary_times_ios = top_diary_ios.get_impression_times() imp_diary_times_ios = top_diary_ios.get_impression_times()
clk_diary_ctr_ios = top_diary_ios.get_result("苹果",clk_diary_times_ios, imp_diary_times_ios, 4, "ctr") clk_diary_ctr_ios = top_diary_ios.get_result("苹果",clk_diary_times_ios, imp_diary_times_ios, 4, "ctr")
top_diary_android = TopFeatures(1, "android", "diary", 100) top_diary_android = TopFeatures("android", "diary", 100)
clk_diary_times_android = top_diary_android.get_click_times() clk_diary_times_android = top_diary_android.get_click_times()
imp_diary_times_android = top_diary_android.get_impression_times() imp_diary_times_android = top_diary_android.get_impression_times()
clk_diary_ctr_android = top_diary_android.get_result("安卓",clk_diary_times_android, imp_diary_times_android, 4, "ctr") clk_diary_ctr_android = top_diary_android.get_result("安卓",clk_diary_times_android, imp_diary_times_android, 4, "ctr")
...@@ -138,17 +135,17 @@ def main(): ...@@ -138,17 +135,17 @@ def main():
print("已获取 Top diary 特征") print("已获取 Top diary 特征")
#2. Top answer #2. Top answer
top_answer_all = TopFeatures(1, "all", "answer", 100) top_answer_all = TopFeatures("all", "answer", 100)
clk_answer_times_all = top_answer_all.get_click_times() clk_answer_times_all = top_answer_all.get_click_times()
imp_answer_times_all = top_answer_all.get_impression_times() imp_answer_times_all = top_answer_all.get_impression_times()
clk_answer_ctr_all = top_answer_all.get_result("所有",clk_answer_times_all, imp_answer_times_all, 2, "ctr") clk_answer_ctr_all = top_answer_all.get_result("所有",clk_answer_times_all, imp_answer_times_all, 2, "ctr")
top_answer_ios = TopFeatures(1, "ios", "answer", 100) top_answer_ios = TopFeatures("ios", "answer", 100)
clk_answer_times_ios = top_answer_ios.get_click_times() clk_answer_times_ios = top_answer_ios.get_click_times()
imp_answer_times_ios = top_answer_ios.get_impression_times() imp_answer_times_ios = top_answer_ios.get_impression_times()
clk_answer_ctr_ios = top_answer_ios.get_result("苹果",clk_answer_times_ios, imp_answer_times_ios, 2, "ctr") clk_answer_ctr_ios = top_answer_ios.get_result("苹果",clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
top_answer_android = TopFeatures(1, "android", "answer", 100) top_answer_android = TopFeatures("android", "answer", 100)
clk_answer_times_android = top_answer_android.get_click_times() clk_answer_times_android = top_answer_android.get_click_times()
imp_answer_times_android = top_answer_android.get_impression_times() imp_answer_times_android = top_answer_android.get_impression_times()
clk_answer_ctr_android = top_answer_android.get_result("安卓",clk_answer_times_android, imp_answer_times_android, 2, "ctr") clk_answer_ctr_android = top_answer_android.get_result("安卓",clk_answer_times_android, imp_answer_times_android, 2, "ctr")
...@@ -160,17 +157,17 @@ def main(): ...@@ -160,17 +157,17 @@ def main():
#3. Top question #3. Top question
top_question_all = TopFeatures(1, "all", "question", 100) top_question_all = TopFeatures("all", "question", 100)
clk_question_times_all = top_question_all.get_click_times() clk_question_times_all = top_question_all.get_click_times()
imp_question_times_all = top_question_all.get_impression_times() imp_question_times_all = top_question_all.get_impression_times()
clk_question_ctr_all = top_question_all.get_result("所有",clk_question_times_all, imp_question_times_all, 2, "ctr") clk_question_ctr_all = top_question_all.get_result("所有",clk_question_times_all, imp_question_times_all, 2, "ctr")
top_question_ios = TopFeatures(1, "ios", "question", 100) top_question_ios = TopFeatures("ios", "question", 100)
clk_question_times_ios = top_question_ios.get_click_times() clk_question_times_ios = top_question_ios.get_click_times()
imp_question_times_ios = top_question_ios.get_impression_times() imp_question_times_ios = top_question_ios.get_impression_times()
clk_question_ctr_ios = top_question_ios.get_result("苹果",clk_question_times_ios, imp_question_times_ios, 2, "ctr") clk_question_ctr_ios = top_question_ios.get_result("苹果",clk_question_times_ios, imp_question_times_ios, 2, "ctr")
top_question_android = TopFeatures(1, "android", "question", 100) top_question_android = TopFeatures("android", "question", 100)
clk_question_times_android = top_question_android.get_click_times() clk_question_times_android = top_question_android.get_click_times()
imp_question_times_android = top_question_android.get_impression_times() imp_question_times_android = top_question_android.get_impression_times()
clk_question_ctr_android = top_question_android.get_result("安卓",clk_question_times_android, imp_question_times_android, 2, "ctr") clk_question_ctr_android = top_question_android.get_result("安卓",clk_question_times_android, imp_question_times_android, 2, "ctr")
......
# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from config import DIRECTORY_PATH
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ',''))
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ',''), platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取 {点击次数 : 独立用户数}
def get_click_times_to_count_uid_df():
"""
rtype : pandas.DataFrame
"""
sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc"
uid_click_times = con_sql(sql)
uid_lst = [i[0] for i in uid_click_times]
click_times_lst = [i[1] for i in uid_click_times]
uid_click_times_df = pd.DataFrame({"uid":uid_lst,"click_times":click_times_lst})
df = uid_click_times_df.groupby(by="click_times",as_index=False).count()
return df
def main():
activate_uid_ctr_all = get_activate_uid_ctr("all")
activate_uid_ctr_ios = get_activate_uid_ctr("ios")
activate_uid_ctr_android = get_activate_uid_ctr("android")
df = get_click_times_to_count_uid_df()
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment