Commit e4bdd8b7 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

修改ctr文件中的数据库
parents bf1ce5a9 227d1696
# -*- coding: UTF-8 -*-
from utils import con_sql,get_yesterday_date
from utils import con_sql,get_yesterday_date,get_between_day
import time
OUTPUT_PATH = "/data2/models/eda/gray_stat/"
......@@ -22,7 +22,7 @@ class GrayStat(object):
self.ndays = ndays
def get_uid_count(self):
sql = "select count(distinct(device_id)) from data_feed_click2 \
sql = "select count(distinct(device_id)) from data_feed_click \
where stat_date='{0}' \
and cid_type='{3}' \
and device_id regexp '[{1}]$' \
......@@ -32,7 +32,7 @@ class GrayStat(object):
return uid_count
def get_uid_clk_times(self):
sql = "select count(device_id) from data_feed_click2 \
sql = "select count(device_id) from data_feed_click \
where stat_date='{0}' \
and cid_type='{3}' \
and device_id regexp '[{1}]$' \
......@@ -42,12 +42,12 @@ class GrayStat(object):
return uid_clk_times
def get_uid_imp_times(self):
sql = "select count(device_id) from data_feed_exposure2 \
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{0}' \
and cid_type='{4}' \
and device_id regexp '[{1}]$' \
and device_type{2} \
and device_id in (select device_id from data_feed_click2 \
and device_id in (select device_id from data_feed_click \
where stat_date='{0}' \
and cid_type='{4}' \
and device_id regexp '[{1}]$' \
......@@ -60,6 +60,11 @@ class GrayStat(object):
if __name__ == '__main__':
# date_list = get_between_day('2018-08-20','2018-08-29')
# for my_date in date_list:
print("开始获取ffm中的灰度非灰度比例")
start = time.time()
#1.ffm中的灰度非灰度(ios和安卓一样):
......@@ -75,14 +80,14 @@ if __name__ == '__main__':
uid_count = g_class.get_uid_count()
uid_clk_times = g_class.get_uid_clk_times()
uid_imp_times = g_class.get_uid_imp_times()
uid_clk_rate = round(uid_clk_times/uid_imp_times,4)
uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0
result1.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\
uid_clk_times,uid_imp_times,uid_clk_rate])
result1.append([g_class.ndays,g_class.cid_type,platform,'all',\
result1[-1][4]+result1[-2][4],\
result1[-1][5]+result1[-2][5],\
result1[-1][6]+result1[-2][6],\
round((result1[-1][5]+result1[-2][5])/(result1[-1][6]+result1[-2][6]),4)])
round((result1[-1][5]+result1[-2][5])/(result1[-1][6]+result1[-2][6]),4)]) if (result1[-1][6]+result1[-2][6]) != 0 else 0
#1.2获取所有平台的数据
labels = ['6','8','all']
for i in range(3):
......@@ -90,7 +95,7 @@ if __name__ == '__main__':
result1[i][4]+result1[i+3][4],\
result1[i][5]+result1[i+3][5],\
result1[i][6]+result1[i+3][6],\
round((result1[i][5]+result1[i+3][5])/(result1[i][6]+result1[i+3][6]),4)])
round((result1[i][5]+result1[i+3][5])/(result1[i][6]+result1[i+3][6]),4)]) if (result1[i][6]+result1[i+3][6]) !=0 else 0
#1.3把一天所有的数据存入文件
output1 = OUTPUT_PATH + "gray_ffm.csv"
with open(output1,'a+') as f:
......@@ -121,14 +126,14 @@ if __name__ == '__main__':
uid_count = g_class.get_uid_count()
uid_clk_times = g_class.get_uid_clk_times()
uid_imp_times = g_class.get_uid_imp_times()
uid_clk_rate = round(uid_clk_times/uid_imp_times,4)
uid_clk_rate = round(uid_clk_times/uid_imp_times,4) if uid_imp_times != 0 else 0
result2.append([g_class.ndays,g_class.cid_type,platform,gray,uid_count,\
uid_clk_times,uid_imp_times,uid_clk_rate])
result2.append([g_class.ndays,g_class.cid_type,platform,'all',\
result2[-1][4]+result2[-2][4],\
result2[-1][5]+result2[-2][5],\
result2[-1][6]+result2[-2][6],\
round((result2[-1][5]+result2[-2][5])/(result2[-1][6]+result2[-2][6]),4)])
round((result2[-1][5]+result2[-2][5])/(result2[-1][6]+result2[-2][6]),4)]) if (result2[-1][6]+result2[-2][6]) != 0 else 0
#2.2获取所有平台的数据
labels = ['gray','not gray','all']
for i in range(3):
......@@ -136,7 +141,7 @@ if __name__ == '__main__':
result2[i][4]+result2[i+3][4],\
result2[i][5]+result2[i+3][5],\
result2[i][6]+result2[i+3][6],\
round((result2[i][5]+result2[i+3][5])/(result2[i][6]+result2[i+3][6]),4)])
round((result2[i][5]+result2[i+3][5])/(result2[i][6]+result2[i+3][6]),4)]) if (result2[i][6]+result2[i+3][6]) !=0 else 0
#2.3把一天所有的数据写入文件
output2 = OUTPUT_PATH + "gray_all.csv"
with open(output2,'a+') as f:
......@@ -144,7 +149,7 @@ if __name__ == '__main__':
line = [str(i) for i in line]
str_line = ','.join(line) + '\n'
f.write(str_line)
end = time.time()
print("程序执行时间:{}s".format(end-start))
......
......@@ -35,3 +35,19 @@ def get_yesterday_date():
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y-%m-%d")
return yesterday
def get_between_day(begin_date,end_date):
#获取指定时间范围内的date
"""
type begin_date : str eg:'2018-08-29'
type end_date : str eg:'2018-09-01'
rtype : list eg:['2018-08-29','2018-08-30','2018-08-31','2018-09-01']
"""
date_list = []
begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%d")
while begin_date <= end_date:
date_str = begin_date.strftime("%Y-%m-%d")
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
return date_list
\ No newline at end of file
......@@ -22,12 +22,12 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
sql_cid = "select count(cid) from data_feed_click2 \
sql_cid = "select count(cid) from data_feed_click \
where stat_date = '{0}' \
and device_type{1} \
and cid_type='{2}'".format(get_yesterday_date(),self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type)
cid_clk_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_click2 \
sql_all = "select count(cid) from data_feed_click \
where stat_date = '{0}' \
and device_type{1}".format(get_yesterday_date(), self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform)
all_clk_count = con_sql(sql_all)[0][0]
......@@ -40,11 +40,11 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
sql_cid = "select count(cid) from data_feed_exposure2 \
sql_cid = "select count(cid) from data_feed_exposure \
where stat_date = '{0}' \
and device_type{1} and cid_type='{2}'".format(get_yesterday_date(),self.platform,self.cid_type)
cid_imp_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_exposure2 \
sql_all = "select count(cid) from data_feed_exposure \
where stat_date = '{0}' \
and device_type{1}".format(get_yesterday_date(), self.platform)
all_imp_count = con_sql(sql_all)[0][0]
......
......@@ -24,13 +24,13 @@ class ClkCidUidRate(object):
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
sql_clk = "select count(distinct(device_id)) from data_feed_click2 \
sql_clk = "select count(distinct(device_id)) from data_feed_click \
where stat_date = '{0}' \
and device_type{1} \
and cid_type{2}".format(get_yesterday_date(),self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(distinct(device_id)) from data_feed_exposure2 \
sql_imp = "select count(distinct(device_id)) from data_feed_exposure \
where stat_date = '{0}' \
and device_type{1} \
and cid_type{2}".format(get_yesterday_date(),self.platform,self.cid_type)
......
......@@ -16,14 +16,14 @@ def get_activate_uid_ctr(platform):
platform = "!='App Store'"
else:
platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click2 \
sql_clk = "select count(device_id) from data_feed_click \
where stat_date = '{0}' \
and device_type{1}".format(get_yesterday_date(), platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure2 \
sql_imp = "select count(device_id) from data_feed_exposure \
where stat_date = '{0}' \
and device_id in \
(select device_id from data_feed_click2 \
(select device_id from data_feed_click \
where stat_date = '{0}' \
and device_type{1}) \
and device_type{2}".format(get_yesterday_date(), platform.replace(' ','') if platform[-2]=='e' else platform, platform)
......@@ -47,13 +47,13 @@ def get_activate_uid_imp_times(city):
city = "='beijing'"
else:
city = " is not null"
sql_uid = "select count(distinct(device_id)) from data_feed_click2 \
sql_uid = "select count(distinct(device_id)) from data_feed_click \
where stat_date = '{0}' \
and city_id{1}".format(get_yesterday_date(),city)
sql_uid_count = con_sql(sql_uid)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure2 \
sql_imp = "select count(device_id) from data_feed_exposure \
where device_id in \
(select device_id from data_feed_click2 \
(select device_id from data_feed_click \
where stat_date = '{0}' \
and city_id{1}) \
and stat_date = '{0}' \
......@@ -87,7 +87,7 @@ def get_click_times_to_count_uid():
"""
sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \
from data_feed_click2 \
from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \
group by times order by times"
......
......@@ -29,100 +29,100 @@ def get_click_zero_uid_count(platform):
platform = " is not null"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure2 \
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {2})) \
union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure2 \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {2})) \
union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure2 \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {3})) \
union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure2 \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {5}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {4})) \
union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure2 \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {6}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {5})) \
union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure2 \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and stat_date = '{7}' \
and device_id not in \
(select distinct(device_id) from data_feed_click2 \
(select distinct(device_id) from data_feed_click \
where stat_date = '{7}' \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_date1,my_date2,my_date3,my_date4,my_date5,get_yesterday_date())
cursor.execute(sql)
result = cursor.fetchall()
......
......@@ -21,51 +21,51 @@ def get_register_uid_count():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {0}) \
union all \
select '7-14' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {1}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {0}) \
union all \
select '14-30' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {2}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {1}) \
union all \
select '30-60' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {2}) \
union all \
select '60-90' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {3}) \
union all \
select '90+' as label,count(distinct(device_id)) \
from data_feed_exposure2 \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure2 \
(select distinct(device_id) from data_feed_exposure \
where stat_date < {4})".format(my_date1,my_date2,my_date3,my_date4,my_date5)
cursor.execute(sql)
result = cursor.fetchall()
......
......@@ -22,7 +22,7 @@ class TopFeatures(object):
def get_click_times(self):
# rtype : dict
sql = "select cid,count(cid) from data_feed_click2 \
sql = "select cid,count(cid) from data_feed_click \
where stat_date = '{0}' \
and device_type{1} and cid_type='{2}' \
group by cid \
......@@ -33,7 +33,7 @@ class TopFeatures(object):
def get_impression_times(self):
# rtype : dict
sql = "select cid,count(cid) from data_feed_exposure2 \
sql = "select cid,count(cid) from data_feed_exposure \
where stat_date = '{0}' \
and device_type{1} and cid_type='{2}' \
group by cid order by count(cid) desc".format(get_yesterday_date(), self.platform, self.cid_type)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment