Commit 01b129f2 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

change main function for test
parents ee6555f3 6a061403
......@@ -12,7 +12,6 @@ def con_sql(sql):
return result
#1 获取所有平台的0点击用户占比
def get_all_click_zero_rate():
sql = "select count(distinct(device_id)) from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day)"
......
# -*- coding: UTF-8 -*-
from utils import con_sql
class CidRate(object):
def __init__(self, ndays, platform, cid_type):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
"""
self.ndays = ndays
if platform == "ios":
self.platform = "='AppStore'"
elif platform == "android":
self.platform = "!='AppStore'"
else:
self.platform = " is not null"
self.cid_type = cid_type
def get_cid_clk_rate(self, platform):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql_cid = "select count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type)
cid_clk_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(self.ndays, self.platform)
all_clk_count = con_sql(sql_all)[0][0]
cid_clk_rate = round(cid_clk_count/all_clk_count,4)
return [platform,cid_clk_count,all_clk_count,cid_clk_rate]
def get_cid_imp_rate(self, platform):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql_cid = "select count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}'".format(self.ndays,self.platform,self.cid_type)
cid_imp_count = con_sql(sql_cid)[0][0]
sql_all = "select count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1}".format(self.ndays, self.platform)
all_imp_count = con_sql(sql_all)[0][0]
cid_imp_rate = round(cid_imp_count/all_imp_count,4)
return [platform,cid_imp_count,all_imp_count,cid_imp_rate]
def main():
answer_rate_all = CidRate(1,"all","answer").get_cid_imp_rate("所有")
answer_rate_ios = CidRate(1,"ios","answer").get_cid_imp_rate("苹果")
answer_rate_android = CidRate(1,"android","answer").get_cid_imp_rate("安卓")
answer_rate_result = [answer_rate_all,answer_rate_ios,answer_rate_android]
if __name__ == '__main__':
main()
# -*- coding: UTF-8 -*-
from utils import con_sql
class ClkCidUidRate(object):
def __init__(self, ndays, platform, cid_type):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question';"everything"...
"""
self.ndays = ndays
if platform == "ios":
self.platform = "='AppStore'"
elif platform == "android":
self.platform = "!='AppStore'"
else:
self.platform = " is not null"
if cid_type == "everything":
self.cid_type = " is not null"
else:
self.cid_type = "='" + cid_type + "'"
def get_clk_cid_uid_rate(self, platform):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql_clk = "select count(distinct(device_id)) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \
and cid_type{2}".format(self.ndays,self.platform,self.cid_type)
clk_count = con_sql(sql_clk)[0][0]
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql_imp = "select count(distinct(device_id)) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} \
and cid_type{2}".format(self.ndays,self.platform,self.cid_type)
imp_count = con_sql(sql_imp)[0][0]
clk_rate = round(clk_count/imp_count,4)
return [platform,clk_count,imp_count,clk_rate]
def result2file(self, result_lst, fpath):
pass
def main():
#1.点击diary用户占比
click_diary_all = ClkCidUidRate(1,"all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate(1,"ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate(1,"android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
print("已获取点击diary用户占比")
#2.点击answer用户占比
click_answer_all = ClkCidUidRate(1,"all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate(1,"ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate(1,"android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
print("已获取点击answer用户占比")
#3.点击question用户占比(曝光表里cid类型没有question,因此下面的曝光数为0,0不能作分母)
#click_question_all = ClkCidUidRate(1,"all","question").get_clk_cid_uid_rate("所有")
#click_question_ios = ClkCidUidRate(1,"ios","question").get_clk_cid_uid_rate("苹果")
#click_question_android = ClkCidUidRate(1,"android","question").get_clk_cid_uid_rate("安卓")
#click_question_result = [click_question_all,click_question_ios,click_question_android]
#print("已获取点击question用户占比")
#4.有点击用户占比
click_everything_all = ClkCidUidRate(1,"all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate(1,"ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate(1,"android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
print("已获取有点击用户占比")
if __name__ == '__main__':
main()
......@@ -6,10 +6,10 @@ from config import DIRECTORY_PATH
class TopFeatures(object):
def __init__(self, ndays, platform, cid_type, top_n=-1):
"""
ndays : 1;2;3;4..
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the rows of the result
top_n : the top rows of the result
"""
self.ndays = ndays
if platform == "ios":
......@@ -36,7 +36,7 @@ class TopFeatures(object):
def get_impression_times(self):
# rtype : dict
if self.platform[-2] == 'e':
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql = "select cid,count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
......@@ -45,12 +45,14 @@ class TopFeatures(object):
imp_times = tuple2dict(con_sql(sql))
return imp_times
def get_result(self, clk={}, imp={}, clk_n=2, result_types="ctr"):
def get_result(self, platform, clk={}, imp={}, clk_n=2, result_types="ctr"):
"""
platform : "所有";"苹果","安卓" #方便显示
cid_type : 'diary';'answer';'question';"everything"... #方便显示
clk : dict
imp : dict
clk_n : 获取topN点击率时,过滤的点击数
result_types : "clk";"imp";"ctr"
result_types : sorted by ["clk","imp","ctr"]
rtype : list
"""
topn = []
......@@ -61,7 +63,7 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,clk[i],0,0,url))
topn.append((platform,i,clk[i],0,0,url))
topn.sort(key=lambda x:x[2],reverse=True)
return topn[:int(self.top_n)]
#获取topN的曝光
......@@ -71,7 +73,7 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,0,imp[i],0,url))
topn.append((platform,i,0,imp[i],0,url))
topn.sort(key=lambda x:x[3],reverse=True)
return topn[:int(self.top_n)]
#获取topN的ctr
......@@ -82,12 +84,13 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
topn.append((platform,i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
topn.sort(key=lambda x:x[4],reverse=True)
return topn[:int(self.top_n)]
def result2file(self, result_lst, fpath):
"""
cid_type : 'diary';'answer';'question';"everything"... #方便显示
result_lst : [all_result,ios_result,android_result]
fpath : output filename
rtype : none
......@@ -117,62 +120,65 @@ def main():
top_diary_all = TopFeatures(1, "all", "diary", 100)
clk_diary_times_all = top_diary_all.get_click_times()
imp_diary_times_all = top_diary_all.get_impression_times()
clk_diary_ctr_all = top_diary_all.get_result(clk_diary_times_all, imp_diary_times_all, 2, "ctr")
clk_diary_ctr_all = top_diary_all.get_result("所有",clk_diary_times_all, imp_diary_times_all, 4, "ctr")
top_diary_ios = TopFeatures(1, "ios", "diary", 100)
clk_diary_times_ios = top_diary_ios.get_click_times()
imp_diary_times_ios = top_diary_ios.get_impression_times()
clk_diary_ctr_ios = top_diary_ios.get_result(clk_diary_times_ios, imp_diary_times_ios, 2, "ctr")
clk_diary_ctr_ios = top_diary_ios.get_result("苹果",clk_diary_times_ios, imp_diary_times_ios, 4, "ctr")
top_diary_android = TopFeatures(1, "android", "diary", 100)
clk_diary_times_android = top_diary_android.get_click_times()
imp_diary_times_android = top_diary_android.get_impression_times()
clk_diary_ctr_android = top_diary_android.get_result(clk_diary_times_android, imp_diary_times_android, 2, "ctr")
clk_diary_ctr_android = top_diary_android.get_result("安卓",clk_diary_times_android, imp_diary_times_android, 4, "ctr")
result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_diary_{}.txt".format(get_yesterday_date())
top_diary_all.result2file(result_lst, output_path)
print("已获取 Top diary 特征")
#2. Top answer
top_answer_all = TopFeatures(1, "all", "answer", 100)
clk_answer_times_all = top_answer_all.get_click_times()
imp_answer_times_all = top_answer_all.get_impression_times()
clk_answer_ctr_all = top_answer_all.get_result(clk_answer_times_all, imp_answer_times_all, 2, "ctr")
clk_answer_ctr_all = top_answer_all.get_result("所有",clk_answer_times_all, imp_answer_times_all, 2, "ctr")
top_answer_ios = TopFeatures(1, "ios", "answer", 100)
clk_answer_times_ios = top_answer_ios.get_click_times()
imp_answer_times_ios = top_answer_ios.get_impression_times()
clk_answer_ctr_ios = top_answer_ios.get_result(clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
clk_answer_ctr_ios = top_answer_ios.get_result("苹果",clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
top_answer_android = TopFeatures(1, "android", "answer", 100)
clk_answer_times_android = top_answer_android.get_click_times()
imp_answer_times_android = top_answer_android.get_impression_times()
clk_answer_ctr_android = top_answer_android.get_result(clk_answer_times_android, imp_answer_times_android, 2, "ctr")
clk_answer_ctr_android = top_answer_android.get_result("安卓",clk_answer_times_android, imp_answer_times_android, 2, "ctr")
result_lst = [clk_answer_ctr_all, clk_answer_ctr_ios, clk_answer_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_answer_{}.txt".format(get_yesterday_date())
top_answer_all.result2file(result_lst, output_path)
print("已获取 Top answer 特征")
#3. Top question
top_question_all = TopFeatures(1, "all", "question", 100)
clk_question_times_all = top_question_all.get_click_times()
imp_question_times_all = top_question_all.get_impression_times()
clk_question_ctr_all = top_question_all.get_result(clk_question_times_all, imp_question_times_all, 2, "ctr")
clk_question_ctr_all = top_question_all.get_result("所有",clk_question_times_all, imp_question_times_all, 2, "ctr")
top_question_ios = TopFeatures(1, "ios", "question", 100)
clk_question_times_ios = top_question_ios.get_click_times()
imp_question_times_ios = top_question_ios.get_impression_times()
clk_question_ctr_ios = top_question_ios.get_result(clk_question_times_ios, imp_question_times_ios, 2, "ctr")
clk_question_ctr_ios = top_question_ios.get_result("苹果",clk_question_times_ios, imp_question_times_ios, 2, "ctr")
top_question_android = TopFeatures(1, "android", "question", 100)
clk_question_times_android = top_question_android.get_click_times()
imp_question_times_android = top_question_android.get_impression_times()
clk_question_ctr_android = top_question_android.get_result(clk_question_times_android, imp_question_times_android, 2, "ctr")
clk_question_ctr_android = top_question_android.get_result("安卓",clk_question_times_android, imp_question_times_android, 2, "ctr")
result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date())
top_question_all.result2file(result_lst, output_path)
print("已获取 Top question 特征")
if __name__ == '__main__':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment