Commit c576970e authored by 高雅喆's avatar 高雅喆

use new table with index

parent 1d246b32
...@@ -24,12 +24,12 @@ class CidRate(object): ...@@ -24,12 +24,12 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
sql_cid = "select count(cid) from data_feed_click \ sql_cid = "select count(cid) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \ and device_type{1} \
and cid_type='{2}'".format(self.ndays,self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type) and cid_type='{2}'".format(self.ndays,self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type)
cid_clk_count = con_sql(sql_cid)[0][0] cid_clk_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_click \ sql_all = "select count(cid) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(self.ndays, self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform) and device_type{1}".format(self.ndays, self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform)
all_clk_count = con_sql(sql_all)[0][0] all_clk_count = con_sql(sql_all)[0][0]
...@@ -42,11 +42,11 @@ class CidRate(object): ...@@ -42,11 +42,11 @@ class CidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
sql_cid = "select count(cid) from data_feed_exposure \ sql_cid = "select count(cid) from data_feed_exposure2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type) and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type)
cid_imp_count = con_sql(sql_cid)[0][0] cid_imp_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_exposure \ sql_all = "select count(cid) from data_feed_exposure2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(self.ndays, self.platform) and device_type{1}".format(self.ndays, self.platform)
all_imp_count = con_sql(sql_all)[0][0] all_imp_count = con_sql(sql_all)[0][0]
......
...@@ -26,13 +26,13 @@ class ClkCidUidRate(object): ...@@ -26,13 +26,13 @@ class ClkCidUidRate(object):
platform : "所有";"苹果","安卓" #方便显示 platform : "所有";"苹果","安卓" #方便显示
rtype : list rtype : list
""" """
sql_clk = "select count(distinct(device_id)) from data_feed_click \ sql_clk = "select count(distinct(device_id)) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \ and device_type{1} \
and cid_type{2}".format(self.ndays,self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type) and cid_type{2}".format(self.ndays,self.platform.replace(' ','') if self.platform[-2]=='e' else self.platform,self.cid_type)
clk_count = con_sql(sql_clk)[0][0] clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(distinct(device_id)) from data_feed_exposure \ sql_imp = "select count(distinct(device_id)) from data_feed_exposure2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \ and device_type{1} \
and cid_type{2}".format(self.ndays,self.platform,self.cid_type) and cid_type{2}".format(self.ndays,self.platform,self.cid_type)
......
...@@ -17,14 +17,14 @@ def get_activate_uid_ctr(platform, ndays=1): ...@@ -17,14 +17,14 @@ def get_activate_uid_ctr(platform, ndays=1):
platform = "!='App Store'" platform = "!='App Store'"
else: else:
platform = " is not null" platform = " is not null"
sql_clk = "select count(device_id) from data_feed_click \ sql_clk = "select count(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform) and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform)
clk_count = con_sql(sql_clk)[0][0] clk_count = con_sql(sql_clk)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \ sql_imp = "select count(device_id) from data_feed_exposure2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_id in \ and device_id in \
(select device_id from data_feed_click \ (select device_id from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \
and device_type{2}) \ and device_type{2}) \
and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform) and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform)
...@@ -49,13 +49,13 @@ def get_activate_uid_imp_times(city,ndays=1): ...@@ -49,13 +49,13 @@ def get_activate_uid_imp_times(city,ndays=1):
city = "='beijing'" city = "='beijing'"
else: else:
city = " is not null" city = " is not null"
sql_uid = "select count(distinct(device_id)) from data_feed_click \ sql_uid = "select count(distinct(device_id)) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}".format(ndays,city) and city_id{1}".format(ndays,city)
sql_uid_count = con_sql(sql_uid)[0][0] sql_uid_count = con_sql(sql_uid)[0][0]
sql_imp = "select count(device_id) from data_feed_exposure \ sql_imp = "select count(device_id) from data_feed_exposure2 \
where device_id in \ where device_id in \
(select device_id from data_feed_click \ (select device_id from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and city_id{1}) \ and city_id{1}) \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
...@@ -89,7 +89,7 @@ def get_click_times_to_count_uid(): ...@@ -89,7 +89,7 @@ def get_click_times_to_count_uid():
""" """
sql = "select times,count(device_id) \ sql = "select times,count(device_id) \
from (select device_id,count(cid_type) as times \ from (select device_id,count(cid_type) as times \
from data_feed_click \ from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
group by device_id) as t \ group by device_id) as t \
group by times order by times" group by times order by times"
......
...@@ -32,100 +32,100 @@ def get_click_zero_uid_count(platform): ...@@ -32,100 +32,100 @@ def get_click_zero_uid_count(platform):
platform = " is not null" platform = " is not null"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor() cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure \ sql = "select '0-7' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {2})) \ where time < {2})) \
union all \ union all \
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure \ select '7-14' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {3}) \ where time < {3}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {2})) \ where time < {2})) \
union all \ union all \
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure \ select '14-30' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {4}) \ where time < {4}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {3})) \ where time < {3})) \
union all \ union all \
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure \ select '30-60' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {5}) \ where time < {5}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {4})) \ where time < {4})) \
union all \ union all \
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure \ select '60-90' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {6}) \ where time < {6}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {5})) \ where time < {5})) \
union all \ union all \
select '90+' as label,count(distinct(device_id)) from data_feed_exposure \ select '90+' as label,count(distinct(device_id)) from data_feed_exposure2 \
where device_type{0} \ where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \ and device_id not in \
(select distinct(device_id) from data_feed_click \ (select distinct(device_id) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \ and device_type{1}) \
and device_id in \ and device_id in \
(select distinct(device_id) \ (select distinct(device_id) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id in \ where device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_tm1,my_tm2,my_tm3,my_tm4,my_tm5) where time < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
......
...@@ -26,51 +26,51 @@ def get_register_uid_count(): ...@@ -26,51 +26,51 @@ def get_register_uid_count():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor() cursor = db.cursor()
sql = "select '0-7' as label,count(distinct(device_id)) \ sql = "select '0-7' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {0}) \ where time < {0}) \
union all \ union all \
select '7-14' as label,count(distinct(device_id)) \ select '7-14' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {1}) \ where time < {1}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {0}) \ where time < {0}) \
union all \ union all \
select '14-30' as label,count(distinct(device_id)) \ select '14-30' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {2}) \ where time < {2}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {1}) \ where time < {1}) \
union all \ union all \
select '30-60' as label,count(distinct(device_id)) \ select '30-60' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {3}) \ where time < {3}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {2}) \ where time < {2}) \
union all \ union all \
select '60-90' as label,count(distinct(device_id)) \ select '60-90' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id not in \ where device_id not in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {4}) \ where time < {4}) \
and device_id in \ and device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {3}) \ where time < {3}) \
union all \ union all \
select '90+' as label,count(distinct(device_id)) \ select '90+' as label,count(distinct(device_id)) \
from data_feed_exposure \ from data_feed_exposure2 \
where device_id in \ where device_id in \
(select distinct(device_id) from data_feed_exposure \ (select distinct(device_id) from data_feed_exposure2 \
where time < {4})".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5) where time < {4})".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
......
...@@ -24,7 +24,7 @@ class TopFeatures(object): ...@@ -24,7 +24,7 @@ class TopFeatures(object):
def get_click_times(self): def get_click_times(self):
# rtype : dict # rtype : dict
sql = "select cid,count(cid) from data_feed_click \ sql = "select cid,count(cid) from data_feed_click2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \ and device_type{1} and cid_type='{2}' \
group by cid \ group by cid \
...@@ -35,7 +35,7 @@ class TopFeatures(object): ...@@ -35,7 +35,7 @@ class TopFeatures(object):
def get_impression_times(self): def get_impression_times(self):
# rtype : dict # rtype : dict
sql = "select cid,count(cid) from data_feed_exposure \ sql = "select cid,count(cid) from data_feed_exposure2 \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \ and device_type{1} and cid_type='{2}' \
group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type) group by cid order by count(cid) desc".format(self.ndays, self.platform, self.cid_type)
...@@ -155,7 +155,7 @@ def main(): ...@@ -155,7 +155,7 @@ def main():
clk_question_ctr_android = top_question_android.get_result("安卓", 2, "ctr") clk_question_ctr_android = top_question_android.get_result("安卓", 2, "ctr")
result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android] result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date()) output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date().replace('-',''))
top_question_all.result2file(result_lst, output_path) top_question_all.result2file(result_lst, output_path)
print("已获取 Top question 特征") print("已获取 Top question 特征")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment