Commit 413961fe authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

change main
parents 8cebc1d9 b49bb083
......@@ -26,9 +26,9 @@ def result2file(fpath):
1.5 无点击用户占比(=无点击用户数/有曝光用户数)
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary(sorted by ctr)
2.3 Top 100 Answer(sorted by ctr)
2.4 Top 100 Question(sorted by click times)
2.2 Top 100 diary (sorted by ctr)
2.3 Top 100 Answer (sorted by ctr)
2.4 Top 100 Question (sorted by click times)
......
DIRECTORY_PATH="/data2/models/eda/recommended_indexs/"
\ No newline at end of file
DIRECTORY_PATH="/data2/models/eda/test/"
\ No newline at end of file
......@@ -24,8 +24,8 @@ class TopFeatures(object):
def get_click_times(self):
# rtype : dict
if self.cid_type[-2] == 'e':
self.cid_type = self.cid_type.replace(' ','')
if self.platform[-2] == 'e':
self.platform = self.platform.replace(' ','')
sql = "select cid,count(cid) from data_feed_click \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \
......@@ -36,8 +36,8 @@ class TopFeatures(object):
def get_impression_times(self):
# rtype : dict
if self.cid_type[-2] == 'e':
self.cid_type = self.cid_type[:-6] + ' ' + self.cid_type[:-6:]
if self.platform[-2] == 'e':
self.platform = self.platform[:-6] + ' ' + self.platform[-6:]
sql = "select cid,count(cid) from data_feed_exposure \
where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \
and device_type{1} and cid_type='{2}' \
......@@ -97,7 +97,7 @@ class TopFeatures(object):
tplt = "{0:\u3000<4}\t{1:\u3000<12}\t{2:\u3000^6}\t{3:\u3000^6}\t{4:\u3000<8}\t{5:\u3000^15}\n"
f.write("Top {0} {1}\n".format(self.top_n,self.cid_type))
sep = "=================================================================\n"
header = tplt.format("平台","{}_id".format(self.cid_type),"点击数","曝光数","点击率","{}链接".format(self.cid_type))
header = tplt.format("平台","{0}_id".format(self.cid_type),"点击数","曝光数","点击率","{1}链接".format(self.cid_type,self.cid_type))
f.write(sep)
f.write(header)
for i in result_lst:
......@@ -112,26 +112,68 @@ class TopFeatures(object):
def main():
#1. Top diary
top_diary_all = TopFeatures(1, "all", "diary", 100)
clk_diary_times_all = top_diary_all.get_click_times()
imp_diary_times_all = top_diary_all.get_impression_times()
clk_diary_ctr_all = top_diary_all.get_result(clk_diary_times_all, imp_diary_times_all, 2, "ctr")
top_diary_ios = TopFeatures(1, "ios", "diary", 100)
clk_diary_times_ios = top_diary_ios.get_click_times()
imp_diary_times_ios = top_diary_ios.get_impression_times()
clk_diary_ctr_ios = top_diary_ios.get_result(clk_diary_times_ios, imp_diary_times_ios, 2, "ctr")
top_diary_android = TopFeatures(1, "android", "diary", 100)
clk_diary_times_android = top_diary_android.get_click_times()
imp_diary_times_android = top_diary_android.get_impression_times()
clk_diary_ctr_android = top_diary_android.get_result(clk_diary_times_android, imp_diary_times_android, 2, "ctr")
result_lst = [clk_diary_ctr_all, clk_diary_ctr_ios, clk_diary_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_diary_{}.txt".format(get_yesterday_date())
top_diary_all.result2file(result_lst, output_path)
#2. Top answer
top_answer_all = TopFeatures(1, "all", "answer", 100)
clk_answer_times_all = top_answer_all.get_click_times()
imp_answer_times_all = top_answer_all.get_impression_times()
clk_answer_ctr_all = top_answer_all.get_result(clk_answer_times_all, imp_answer_times_all, 4, "ctr")
clk_answer_ctr_all = top_answer_all.get_result(clk_answer_times_all, imp_answer_times_all, 2, "ctr")
top_answer_ios = TopFeatures(1, "ios", "answer", 100)
clk_answer_times_ios = top_answer_ios.get_click_times()
imp_answer_times_ios = top_answer_ios.get_impression_times()
clk_answer_ctr_ios = top_answer_ios.get_result(clk_answer_times_ios, imp_answer_times_ios, 4, "ctr")
clk_answer_ctr_ios = top_answer_ios.get_result(clk_answer_times_ios, imp_answer_times_ios, 2, "ctr")
top_answer_android = TopFeatures(1, "android", "answer", 100)
clk_answer_times_android = top_answer_android.get_click_times()
imp_answer_times_android = top_answer_android.get_impression_times()
clk_answer_ctr_android = top_answer_android.get_result(clk_answer_times_android, imp_answer_times_android, 4, "ctr")
clk_answer_ctr_android = top_answer_android.get_result(clk_answer_times_android, imp_answer_times_android, 2, "ctr")
result_lst = [clk_answer_ctr_all, clk_answer_ctr_ios, clk_answer_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_answer_{}.txt".format(get_yesterday_date())
top_answer_all.result2file(result_lst, output_path)
#3. Top question
top_question_all = TopFeatures(1, "all", "question", 100)
clk_question_times_all = top_question_all.get_click_times()
imp_question_times_all = top_question_all.get_impression_times()
clk_question_ctr_all = top_question_all.get_result(clk_question_times_all, imp_question_times_all, 2, "ctr")
top_question_ios = TopFeatures(1, "ios", "question", 100)
clk_question_times_ios = top_question_ios.get_click_times()
imp_question_times_ios = top_question_ios.get_impression_times()
clk_question_ctr_ios = top_question_ios.get_result(clk_question_times_ios, imp_question_times_ios, 2, "ctr")
top_question_android = TopFeatures(1, "android", "question", 100)
clk_question_times_android = top_question_android.get_click_times()
imp_question_times_android = top_question_android.get_impression_times()
clk_question_ctr_android = top_question_android.get_result(clk_question_times_android, imp_question_times_android, 2, "ctr")
result_lst = [clk_question_ctr_all, clk_question_ctr_ios, clk_question_ctr_android]
output_path = DIRECTORY_PATH + "top100_ctr_question_{}.txt".format(get_yesterday_date())
top_question_all.result2file(result_lst, output_path)
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment