Commit 3cf289c5 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

change con_sql argument
parents 749a0c2d 8612156b
......@@ -21,82 +21,109 @@ my_tm5 = int(my_date5.strftime("%s"))
def get_rate_detail(platform):
if platform == "ios":
self.platform = "='App Store'"
platform = "='App Store'"
elif platform == "android":
self.platform = "!='App Store'"
platform = "!='App Store'"
else:
self.platform = " is not null"
platform = " is not null"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
sql_tmp = "create temporary table if not exists tmp_table as \
(select distinct(device_id) from data_feed_exposure \
sql = "select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform)
sql_last = "select count(distinct(device_id)) from tmp_table\
where device_id in \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {0})) \
where time < {2})) \
union \
select count(distinct(device_id)) from tmp_table\
where device_id in \
select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {1}) \
where time < {3}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {0})) \
where time < {2})) \
union \
select count(distinct(device_id)) from tmp_table\
where device_id in \
select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {2}) \
where time < {4}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {1})) \
where time < {3})) \
union \
select count(distinct(device_id)) from tmp_table\
where device_id in \
select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {3}) \
where time < {5}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {2})) \
where time < {4})) \
union \
select count(distinct(device_id)) from tmp_table\
where device_id in \
select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id not in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}) \
where time < {6}) \
and device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {3})) \
where time < {5})) \
union \
select count(distinct(device_id)) from tmp_table\
where device_id in \
select count(distinct(device_id)) from data_feed_exposure \
where device_type{0} \
and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_id not in \
(select distinct(device_id) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) \
and device_type{1}) \
and device_id in \
(select distinct(device_id) \
from data_feed_exposure \
where device_id in \
(select distinct(device_id) from data_feed_exposure \
where time < {4}))".format(my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql_tmp)
cursor.execute(sql_last)
where time < {6}))".format(platform,platform.replace(' ','') if platform[-2]=='e' else platform,my_tm1,my_tm2,my_tm3,my_tm4,my_tm5)
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
......
# -*- coding: UTF-8 -*-
import pymysql
def con_sql(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
#1 获取所有平台的日记曝光占比
def get_all_diary_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='diary' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
diary_imp_count = con_sql(sql)
diary_imp_count = diary_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
all_diary_imp_rate = diary_imp_count / all_imp_count
return ["所有",diary_imp_count,all_imp_count,round(all_diary_imp_rate,4)]
#2 获取ios平台的日记曝光占比
def get_ios_diary_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='diary' and device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
diary_imp_count = con_sql(sql)
diary_imp_count = diary_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where device_type='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
ios_diary_imp_rate = diary_imp_count / all_imp_count
return ["苹果",diary_imp_count,all_imp_count,round(ios_diary_imp_rate,4)]
#3 获取安卓平台的日记曝光占比
def get_android_diary_imp_rate():
sql = "select count(cid) from data_feed_exposure where cid_type='diary' and device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
diary_imp_count = con_sql(sql)
diary_imp_count = diary_imp_count[0][0]
sql = "select count(cid) from data_feed_exposure where device_type!='App Store' and from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
all_imp_count = con_sql(sql)
all_imp_count = all_imp_count[0][0]
android_diary_imp_rate = diary_imp_count / all_imp_count
return ["安卓",diary_imp_count,all_imp_count,round(android_diary_imp_rate,4)]
if __name__ == "__main__":
all_diary_imp_rate = get_all_diary_imp_rate()
ios_diary_imp_rate = get_ios_diary_imp_rate()
android_diary_imp_rate = get_android_diary_imp_rate()
# -*- coding: UTF-8 -*-
import datetime
from getAnswerImpRate import get_all_answer_imp_rate,get_ios_answer_imp_rate,get_android_answer_imp_rate
from getDiaryImpRate import get_all_diary_imp_rate,get_ios_diary_imp_rate,get_android_diary_imp_rate
from getActivateUidCtr import get_all_click_one_rate,get_ios_click_one_rate,get_android_click_one_rate
from getClickAnswerUidRate import get_all_click_answer_rate,get_ios_click_answer_rate,get_android_click_answer_rate
from getClickDiaryUidRate import get_all_click_diary_rate,get_ios_click_diary_rate,get_android_click_diary_rate
......@@ -20,10 +21,11 @@ def result2file(fpath):
内容概览:以下所有数据都是首页的
1. 比例特征
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.3 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.4 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.5 无点击用户占比(=无点击用户数/有曝光用户数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.4 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.5 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.6 无点击用户占比(=无点击用户数/有曝光用户数)
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
......@@ -49,7 +51,19 @@ def result2file(fpath):
f.write('\n')
print("1.1已将answer曝光占比存入文件")
f.write("#1.2活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
f.write("#1.2diary曝光占比(=diary被曝光数/总cid被曝光数)\n")
f.write(tplt.format("平台","diary被曝光数","总cid被曝光数","diary被曝光数占比"))
all_diary_imp_rate = get_all_diary_imp_rate()
ios_diary_imp_rate = get_ios_diary_imp_rate()
android_diary_imp_rate = get_android_diary_imp_rate()
lst = [all_diary_imp_rate,ios_diary_imp_rate,android_diary_imp_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.2已将diary曝光占比存入文件")
f.write("#1.3活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
f.write(tplt.format("平台","active用户点击次数","active用户曝光次数","active用户点击率"))
all_click_one_rate = get_all_click_one_rate()
ios_click_one_rate = get_ios_click_one_rate()
......@@ -59,9 +73,9 @@ def result2file(fpath):
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.2已将活跃用户点击率存入文件")
print("1.3已将活跃用户点击率存入文件")
f.write("#1.3点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
f.write("#1.4点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
f.write(tplt.format("平台","点击answer用户数","曝光answer用户数","击answer用户占比"))
all_click_answer_rate = get_all_click_answer_rate()
ios_click_answer_rate = get_ios_click_answer_rate()
......@@ -71,9 +85,9 @@ def result2file(fpath):
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.3已将点击answer用户占比存入文件")
print("1.4已将点击answer用户占比存入文件")
f.write("#1.4点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
f.write("#1.5点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
f.write(tplt.format("平台","点击diary用户数","曝光diary用户数","击diary用户占比"))
all_click_diary_rate = get_all_click_diary_rate()
ios_click_diary_rate = get_ios_click_diary_rate()
......@@ -83,9 +97,9 @@ def result2file(fpath):
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.4已将点击diary用户占比存入文件")
print("1.5已将点击diary用户占比存入文件")
f.write("#1.5无点击用户占比(=无点击用户数/有曝光用户数)\n")
f.write("#1.6无点击用户占比(=无点击用户数/有曝光用户数)\n")
f.write(tplt.format("平台","no点击用户数","have曝光用户数","no点击用户占比"))
all_click_zero_rate = get_all_click_zero_rate()
ios_click_zero_rate = get_ios_click_zero_rate()
......@@ -95,7 +109,7 @@ def result2file(fpath):
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.5已将无点击用户占比存入文件")
print("1.6已将无点击用户占比存入文件")
def main():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment