Commit 4758923b authored by 张彦钊's avatar 张彦钊

add new file

parents 11eee554 c8a1b61c
...@@ -19,7 +19,11 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90) ...@@ -19,7 +19,11 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s")) my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail(platform): def get_click_zero_uid_count(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
if platform == "ios": if platform == "ios":
platform = "='App Store'" platform = "='App Store'"
elif platform == "android": elif platform == "android":
...@@ -126,26 +130,16 @@ def get_rate_detail(platform): ...@@ -126,26 +130,16 @@ def get_rate_detail(platform):
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
return result
def result2dict(result):
"""
result : tuple2
rtype : dict
"""
dct = {} dct = {}
sum_count = 0
for i in result: for i in result:
sum_count += i[1] dct[i[0]] = i[1]
for i in result:
dct[i[0]] = "{}--{}%".format(i[1],round(i[1]/sum_count*100,2))
print("sum:{}".format(sum_count))
return dct return dct
if __name__ == '__main__': if __name__ == '__main__':
no_click_uid_detail_all = result2dict(get_rate_detail("all")) no_click_uid_detail_all = get_click_zero_uid_count("all")
no_click_uid_detail_ios = result2dict(get_rate_detail("ios")) no_click_uid_detail_ios = get_click_zero_uid_count("ios")
no_click_uid_detail_android = result2dict(get_rate_detail("android")) no_click_uid_detail_android = get_click_zero_uid_count("android")
...@@ -27,7 +27,7 @@ def result2file(fpath): ...@@ -27,7 +27,7 @@ def result2file(fpath):
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数) 1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数) 1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.7 无点击用户占比(=无点击用户数/有曝光用户数) 1.7 无点击用户占比(=无点击用户数/有曝光用户数)
1.8 无点击用户数分布(根据激活日期和平台来分) #注意:(]里面的数字指的是距离当前时间的天数 1.8 无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
2.Top特征 2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量) 2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr) 2.2 Top 100 diary (sorted by ctr)
......
...@@ -19,7 +19,10 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90) ...@@ -19,7 +19,10 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s")) my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail(): def get_register_uid_count():
"""
rtype : dict
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor() cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \ sql = "select '0-7' as label,count(distinct(device_id)) \
...@@ -72,26 +75,14 @@ def get_rate_detail(): ...@@ -72,26 +75,14 @@ def get_rate_detail():
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
db.close() db.close()
return result
def result2dict(result):
"""
result : tuple2
rtype : dict
"""
dct = {} dct = {}
sum_count = 0
for i in result: for i in result:
sum_count += i[1] dct[i[0]] = i[1]
for i in result:
dct[i[0]] = "{}--{}%".format(i[1],round(i[1]/sum_count*100,2))
print("sum:{}".format(sum_count))
return dct return dct
if __name__ == '__main__': if __name__ == '__main__':
register_uid_detail_all = result2dict(get_rate_detail()) register_uid_detail = get_register_uid_count()
register_uid_detail_ios = result2dict(get_rate_detail())
register_uid_detail_android = result2dict(get_rate_detail())
from utils import con_sql
from getClickZeroUidDetail import get_click_zero_uid_count
from getRegisterUidDetail import get_register_uid_count
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取活跃用户平均每天曝光次数
def get_activate_uid_imp_times(city,ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
city : 'beijing';'all'
rtype : list
"""
if city == "beijing":
city = "='beijing'"
else:
city = " is not null"
sql_uid = "select count(distinct(device_id)) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}".format(ndays,city)
sql_uid_count = con_sql(sql_uid)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}) \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}".format(ndays,city)
sql_imp_times = con_sql(sql_imp)[0][0]
if city == "beijing":
city = "北京"
else:
city = "所有"
return [city,sql_uid_count,sql_imp_times,round(sql_imp_times/sql_uid_count,2)]
#获取无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) ;并且根据平台和激活日记来分
def get_click_zero_uid_rate_detail(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
dct1 = get_click_zero_uid_count(platform)
dct2 = get_register_uid_count()
result = {}
for k in dct1:
result[k] = dct1[k]/dct2[k]
return result
#获取 (用户点击次数 : 独立用户数)
def get_click_times_to_count_uid():
"""
rtype : tuple
"""
sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \
from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \
group by times order by times"
result = con_sql(sql)
return result
\ No newline at end of file
import datetime
import pymysql
#一周之前的timestamp(7)
my_date1 = datetime.date.today() - datetime.timedelta(days=7)
my_tm1 = int(my_date1.strftime("%s"))
#二周之前的timestamp(14)
my_date2 = datetime.date.today() - datetime.timedelta(days=14)
my_tm2 = int(my_date2.strftime("%s"))
#一个月之前的timestamp(30)
my_date3 = datetime.date.today() - datetime.timedelta(days=30)
my_tm3 = int(my_date3.strftime("%s"))
#两个月之前的timestamp(60)
my_date4 = datetime.date.today() - datetime.timedelta(days=60)
my_tm4 = int(my_date4.strftime("%s"))
#三个月之前的timestamp(90)
my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_click_zero_uid_count(platform):
"""
platform : "ios","android","all"
rtype : dict
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2})) \
union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2})) \
union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3})) \
union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {5}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4})) \
union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {6}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {5})) \
union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
dct = {}
for i in result:
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
no_click_uid_detail_all = get_click_zero_uid_count("all")
no_click_uid_detail_ios = get_click_zero_uid_count("ios")
no_click_uid_detail_android = get_click_zero_uid_count("android")
import datetime
import pymysql
#一周之前的timestamp(7)
my_date1 = datetime.date.today() - datetime.timedelta(days=7)
my_tm1 = int(my_date1.strftime("%s"))
#二周之前的timestamp(14)
my_date2 = datetime.date.today() - datetime.timedelta(days=14)
my_tm2 = int(my_date2.strftime("%s"))
#一个月之前的timestamp(30)
my_date3 = datetime.date.today() - datetime.timedelta(days=30)
my_tm3 = int(my_date3.strftime("%s"))
#两个月之前的timestamp(60)
my_date4 = datetime.date.today() - datetime.timedelta(days=60)
my_tm4 = int(my_date4.strftime("%s"))
#三个月之前的timestamp(90)
my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_register_uid_count():
"""
rtype : dict
"""
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '7-14' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '14-30' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
union all \
select '30-60' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
union all \
select '60-90' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
union all \
select '90+' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4})".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
dct = {}
for i in result:
dct[i[0]] = i[1]
return dct
if __name__ == '__main__':
register_uid_detail = get_register_uid_count()
This diff is collapsed.
...@@ -35,52 +35,3 @@ def get_yesterday_date(): ...@@ -35,52 +35,3 @@ def get_yesterday_date():
yesterday = today - datetime.timedelta(days=1) yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d") yesterday = yesterday.strftime("%Y%m%d")
return yesterday return yesterday
#获取各个平台下的活跃用户点击率
def get_activate_uid_ctr(platform, ndays=1):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if platform == "ios":
platform = "='App Store'"
elif platform == "android":
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \
(select device_id from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count, 4 )
if platform == "='App Store'":
platform = "苹果"
elif platform == "!='App Store'":
platform = "安卓"
else:
platform = "所有"
return [platform, clk_count, imp_count, clk_rate]
#获取 (点击次数 : 独立用户数)
def get_click_times_to_count_uid_df():
"""
rtype : tuple
"""
sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \
from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \
group by times order by times"
result = con_sql(sql)
return result
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment