Commit beb3bd7f authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

delete prints
parents 85284446 947c354b
import datetime
import pymysql
#一周之前的timestamp(7)
my_date1 = datetime.date.today() - datetime.timedelta(days=7)
my_tm1 = int(my_date1.strftime("%s"))
#二周之前的timestamp(14)
my_date2 = datetime.date.today() - datetime.timedelta(days=14)
my_tm2 = int(my_date2.strftime("%s"))
#一个月之前的timestamp(30)
my_date3 = datetime.date.today() - datetime.timedelta(days=30)
my_tm3 = int(my_date3.strftime("%s"))
#两个月之前的timestamp(60)
my_date4 = datetime.date.today() - datetime.timedelta(days=60)
my_tm4 = int(my_date4.strftime("%s"))
#三个月之前的timestamp(90)
my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '7-14' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {0}) \
union all \
select '14-30' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
union all \
select '30-60' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
union all \
select '60-90' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
union all \
select '90+' as label,count(distinct(device_id)) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4})".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def result2dict(result):
"""
result : tuple2
rtype : dict
"""
dct = {}
sum_count = 0
for i in result:
sum_count += i[1]
for i in result:
dct[i[0]] = "{}--{}%".format(i[1],round(i[1]/sum_count*100,2))
print("sum:{}".format(sum_count))
return dct
if __name__ == '__main__':
register_uid_detail_all = result2dict(get_rate_detail())
register_uid_detail_ios = result2dict(get_rate_detail())
register_uid_detail_android = result2dict(get_rate_detail())
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
import pymysql import pymysql
import datetime import datetime
import pandas as pd
def con_sql(sql): def con_sql(sql):
#从数据库的表里获取数据 #从数据库的表里获取数据
...@@ -72,15 +71,16 @@ def get_activate_uid_ctr(platform, ndays=1): ...@@ -72,15 +71,16 @@ def get_activate_uid_ctr(platform, ndays=1):
return [platform, clk_count, imp_count, clk_rate] return [platform, clk_count, imp_count, clk_rate]
#获取 {点击次数 : 独立用户数} #获取 (点击次数 : 独立用户数)
def get_click_times_to_count_uid_df(): def get_click_times_to_count_uid_df():
""" """
rtype : pandas.DataFrame rtype : tuple
""" """
sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc" sql = "select times,count(device_id) \
uid_click_times = con_sql(sql) from (select device_id,count(cid_type) as times \
uid_lst = [i[0] for i in uid_click_times] from data_feed_click \
click_times_lst = [i[1] for i in uid_click_times] where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
uid_click_times_df = pd.DataFrame({"uid":uid_lst,"click_times":click_times_lst}) group by device_id) as t \
df = uid_click_times_df.groupby(by="click_times",as_index=False).count() group by times order by times"
return df result = con_sql(sql)
return result
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment