Commit 62b8bc34 authored by 高雅喆's avatar 高雅喆

add a comment and new a class ClkCidUidRate

parent b49bb083
# -*- coding: UTF-8 -*-
from utils import con_sql,tuple2dict,get_yesterday_date
from config import DIRECTORY_PATH
class ClkCidUidRate(object):
def __init__(self, ndays, platform, cid_type):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question';"everything";"nothing"...
"""
self.ndays = ndays
if platform == "ios":
self.platform = "='AppStore'"
elif platform == "android":
self.platform = "!='AppStore'"
else:
self.platform = " is not null"
self.cid_type = cid_type
def get_clk_cid_uid_rate(self, platform):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
pass
def result2file(self, result_lst, fpath):
pass
def main():
#1.点击diary用户占比
click_diary_all = ClkCidUidRate(1,"all","diary").get_clk_cid_uid_rate("所有")
click_diary_ios = ClkCidUidRate(1,"ios","diary").get_clk_cid_uid_rate("苹果")
click_diary_android = ClkCidUidRate(1,"android","diary").get_clk_cid_uid_rate("安卓")
click_diary_result = [click_diary_all,click_diary_ios,click_diary_android]
#2.点击answer用户占比
click_answer_all = ClkCidUidRate(1,"all","answer").get_clk_cid_uid_rate("所有")
click_answer_ios = ClkCidUidRate(1,"ios","answer").get_clk_cid_uid_rate("苹果")
click_answer_android = ClkCidUidRate(1,"android","answer").get_clk_cid_uid_rate("安卓")
click_answer_result = [click_answer_all,click_answer_ios,click_answer_android]
#3.点击question用户占比
click_question_all = ClkCidUidRate(1,"all","question").get_clk_cid_uid_rate("所有")
click_question_ios = ClkCidUidRate(1,"ios","question").get_clk_cid_uid_rate("苹果")
click_question_android = ClkCidUidRate(1,"android","question").get_clk_cid_uid_rate("安卓")
click_question_result = [click_question_all,click_question_ios,click_question_android]
#4.有点击用户占比
click_everything_all = ClkCidUidRate(1,"all","everything").get_clk_cid_uid_rate("所有")
click_everything_ios = ClkCidUidRate(1,"ios","everything").get_clk_cid_uid_rate("苹果")
click_everything_android = ClkCidUidRate(1,"android","everything").get_clk_cid_uid_rate("安卓")
click_everything_result = [click_everything_all,click_everything_ios,click_everything_android]
#5.无点击用户占比
click_nothing_all = ClkCidUidRate(1,"all","nothing").get_clk_cid_uid_rate("所有")
click_nothing_ios = ClkCidUidRate(1,"ios","nothing").get_clk_cid_uid_rate("苹果")
click_nothing_android = ClkCidUidRate(1,"android","nothing").get_clk_cid_uid_rate("安卓")
click_nothing_result = [click_nothing_all,click_nothing_ios,click_nothing_android]
......@@ -6,10 +6,10 @@ from config import DIRECTORY_PATH
class TopFeatures(object):
def __init__(self, ndays, platform, cid_type, top_n=-1):
"""
ndays : 1;2;3;4..
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the rows of the result
top_n : the top rows of the result
"""
self.ndays = ndays
if platform == "ios":
......@@ -36,7 +36,7 @@ class TopFeatures(object):
def get_impression_times(self):
# rtype : dict
if self.platform[-2] == 'e':
if self.platform[-2] == 'e':#注意:曝光表中AppStore有空格
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql = "select cid,count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
......@@ -45,12 +45,13 @@ class TopFeatures(object):
imp_times = tuple2dict(con_sql(sql))
return imp_times
def get_result(self, clk={}, imp={}, clk_n=2, result_types="ctr"):
def get_result(self, platform, clk={}, imp={}, clk_n=2, result_types="ctr"):
"""
platform : "所有";"苹果","安卓" #方便显示
clk : dict
imp : dict
clk_n : 获取topN点击率时,过滤的点击数
result_types : "clk";"imp";"ctr"
result_types : sorted by ["clk","imp","ctr"]
rtype : list
"""
topn = []
......@@ -61,7 +62,7 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,clk[i],0,0,url))
topn.append((platform,i,clk[i],0,0,url))
topn.sort(key=lambda x:x[2],reverse=True)
return topn[:int(self.top_n)]
#获取topN的曝光
......@@ -71,7 +72,7 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,0,imp[i],0,url))
topn.append((platform,i,0,imp[i],0,url))
topn.sort(key=lambda x:x[3],reverse=True)
return topn[:int(self.top_n)]
#获取topN的ctr
......@@ -82,7 +83,7 @@ class TopFeatures(object):
url = "http://m.igengmei.com/diary_book/" + i[i.index('|')+1:] + '/'
else:
url = "http://m.igengmei.com/{0}/".format(self.cid_type) + i[i.index('|')+1:] + '/'
topn.append((self.platform.strip(),i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
topn.append((platform,i,clk[i],imp[i],round(clk[i]/imp[i],4),url))
topn.sort(key=lambda x:x[4],reverse=True)
return topn[:int(self.top_n)]
......@@ -117,58 +118,61 @@ def main():
top_diary_all = TopFeatures(1, "all", "diary", 100)
clk_diary_times_all = top_diary_all.get_click_times()
imp_diary_times_all = top_diary_all.get_impression_times()
clk_diary_ctr_all = top_diary_all.get_result(clk_diary_times_all, imp_diary_times_all, 2, "ctr")
clk_diary_ctr_all = top_diary_all.get_result("所有",clk_diary_times_all, imp_diary_times_all, 4, "ctr")
top_diary_ios = TopFeatures(1, "ios", "diary", 100)
clk_diary_times_ios = top_diary_ios.get_click_times()
imp_diary_times_ios = top_diary_ios.get_impression_times()
clk_diary_ctr_ios = top_diary_ios.get_result(clk_diary_times_ios, imp_diary_times_ios, 2, "ctr")
clk_diary_ctr_ios = top_diary_ios.get_result("苹果",clk_diary_times_ios, imp_diary_times_ios, 4, "ctr")
top_diary_android = TopFeatures(1, "android", "diary", 100)
clk_diary_times_android = top_diary_android.get_click_times()
imp_diary_times_android = top_diary_android.get_impression_times()
clk_diary_ctr_android = top_diary_android.get_result(clk_diary_times_android, imp_diary_times_android, 2, "ctr")
clk_diary_ctr_android = top_diary_android.get_result("安卓",clk_diary_times_android, imp_diary_times_android, 4, "ctr")
result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_diary_{}.txt".format(get_yesterday_date())
top_diary_all.result2file(result_lst, output_path)
print("已获取 Top diary 特征")
#2. Top answer
top_answer_all = TopFeatures(1, "all", "answer", 100)
clk_answer_times_all = top_answer_all.get_click_times()
imp_answer_times_all = top_answer_all.get_impression_times()
clk_answer_ctr_all = top_answer_all.get_result(clk_answer_times_all, imp_answer_times_all, 2, "ctr")
clk_answer_ctr_all = top_answer_all.get_result("所有",clk_answer_times_all, imp_answer_times_all, 2, "ctr")
top_answer_ios = TopFeatures(1, "ios", "answer", 100)
clk_answer_times_ios = top_answer_ios.get_click_times()
imp_answer_times_ios = top_answer_ios.get_impression_times()
clk_answer_ctr_ios = top_answer_ios.get_result(clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
clk_answer_ctr_ios = top_answer_ios.get_result("苹果",clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
top_answer_android = TopFeatures(1, "android", "answer", 100)
clk_answer_times_android = top_answer_android.get_click_times()
imp_answer_times_android = top_answer_android.get_impression_times()
clk_answer_ctr_android = top_answer_android.get_result(clk_answer_times_android, imp_answer_times_android, 2, "ctr")
clk_answer_ctr_android = top_answer_android.get_result("安卓",clk_answer_times_android, imp_answer_times_android, 2, "ctr")
result_lst = [clk_answer_ctr_all, clk_answer_ctr_ios, clk_answer_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_answer_{}.txt".format(get_yesterday_date())
top_answer_all.result2file(result_lst, output_path)
print("已获取 Top answer 特征")
#3. Top question
top_question_all = TopFeatures(1, "all", "question", 100)
clk_question_times_all = top_question_all.get_click_times()
imp_question_times_all = top_question_all.get_impression_times()
clk_question_ctr_all = top_question_all.get_result(clk_question_times_all, imp_question_times_all, 2, "ctr")
clk_question_ctr_all = top_question_all.get_result("所有",clk_question_times_all, imp_question_times_all, 2, "ctr")
top_question_ios = TopFeatures(1, "ios", "question", 100)
clk_question_times_ios = top_question_ios.get_click_times()
imp_question_times_ios = top_question_ios.get_impression_times()
clk_question_ctr_ios = top_question_ios.get_result(clk_question_times_ios, imp_question_times_ios, 2, "ctr")
clk_question_ctr_ios = top_question_ios.get_result("苹果",clk_question_times_ios, imp_question_times_ios, 2, "ctr")
top_question_android = TopFeatures(1, "android", "question", 100)
clk_question_times_android = top_question_android.get_click_times()
imp_question_times_android = top_question_android.get_impression_times()
clk_question_ctr_android = top_question_android.get_result(clk_question_times_android, imp_question_times_android, 2, "ctr")
clk_question_ctr_android = top_question_android.get_result("安卓",clk_question_times_android, imp_question_times_android, 2, "ctr")
print("已获取 Top question 特征")
result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment