1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# -*- coding: UTF-8 -*-
import datetime
from getAnswerImpRate import get_all_answer_imp_rate,get_ios_answer_imp_rate,get_android_answer_imp_rate
from getDiaryImpRate import get_all_diary_imp_rate,get_ios_diary_imp_rate,get_android_diary_imp_rate
from getActivateUidCtr import get_all_click_one_rate,get_ios_click_one_rate,get_android_click_one_rate
from getClickAnswerUidRate import get_all_click_answer_rate,get_ios_click_answer_rate,get_android_click_answer_rate
from getClickDiaryUidRate import get_all_click_diary_rate,get_ios_click_diary_rate,get_android_click_diary_rate
from getClickZeroUidRate import get_all_click_zero_rate,get_ios_click_zero_rate,get_android_click_zero_rate
def get_yesterday_date():
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
return yesterday
def result2file(fpath):
with open(fpath,'w') as f:
tplt = "{0:\u3000<6}\t{1:\u3000<15}\t{2:\u3000<15}\t{3:\u3000<15}\n"
line = """数据日期:{}
内容概览:以下所有数据都是首页的
1. 比例特征
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.4 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.5 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.6 无点击用户占比(=无点击用户数/有曝光用户数)
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
2.3 Top 100 Answer (sorted by ctr)
2.4 Top 100 Question (sorted by ctr)
具体内容:以下所有数据都是首页的
""".format(get_yesterday_date())
f.write(line)
f.write("#1. 比例特征\n")
f.write("=================================================================\n")
f.write("#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)\n")
f.write(tplt.format("平台","answer被曝光数","总cid被曝光数","answer被曝光数占比"))
all_answer_imp_rate = get_all_answer_imp_rate()
ios_answer_imp_rate = get_ios_answer_imp_rate()
android_answer_imp_rate = get_android_answer_imp_rate()
lst = [all_answer_imp_rate,ios_answer_imp_rate,android_answer_imp_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.1已将answer曝光占比存入文件")
f.write("#1.2diary曝光占比(=diary被曝光数/总cid被曝光数)\n")
f.write(tplt.format("平台","diary被曝光数","总cid被曝光数","diary被曝光数占比"))
all_diary_imp_rate = get_all_diary_imp_rate()
ios_diary_imp_rate = get_ios_diary_imp_rate()
android_diary_imp_rate = get_android_diary_imp_rate()
lst = [all_diary_imp_rate,ios_diary_imp_rate,android_diary_imp_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.2已将diary曝光占比存入文件")
f.write("#1.3活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)\n")
f.write(tplt.format("平台","active用户点击次数","active用户曝光次数","active用户点击率"))
all_click_one_rate = get_all_click_one_rate()
ios_click_one_rate = get_ios_click_one_rate()
android_click_one_rate = get_android_click_one_rate()
lst = [all_click_one_rate,ios_click_one_rate,android_click_one_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.3已将活跃用户点击率存入文件")
f.write("#1.4点击answer用户占比(=点击answer用户数/曝光answer用户数)\n")
f.write(tplt.format("平台","点击answer用户数","曝光answer用户数","击answer用户占比"))
all_click_answer_rate = get_all_click_answer_rate()
ios_click_answer_rate = get_ios_click_answer_rate()
android_click_answer_rate = get_android_click_answer_rate()
lst = [all_click_answer_rate,ios_click_answer_rate,android_click_answer_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.4已将点击answer用户占比存入文件")
f.write("#1.5点击diary用户占比(=点击diary用户数/曝光diary用户数)\n")
f.write(tplt.format("平台","点击diary用户数","曝光diary用户数","击diary用户占比"))
all_click_diary_rate = get_all_click_diary_rate()
ios_click_diary_rate = get_ios_click_diary_rate()
android_click_diary_rate = get_android_click_diary_rate()
lst = [all_click_diary_rate,ios_click_diary_rate,android_click_diary_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.5已将点击diary用户占比存入文件")
f.write("#1.6无点击用户占比(=无点击用户数/有曝光用户数)\n")
f.write(tplt.format("平台","no点击用户数","have曝光用户数","no点击用户占比"))
all_click_zero_rate = get_all_click_zero_rate()
ios_click_zero_rate = get_ios_click_zero_rate()
android_click_zero_rate = get_android_click_zero_rate()
lst = [all_click_zero_rate,ios_click_zero_rate,android_click_zero_rate]
for i in lst:
line = tplt.format(i[0],i[1],i[2],i[3])
f.write(line)
f.write('\n')
print("1.6已将无点击用户占比存入文件")
def main():
output_path = "/data2/models/eda/recommended_indexs/1rate_features_%s.txt" % get_yesterday_date()
print("开始获取比例特征...")
result2file(output_path)
print("已完成所有比例特征提取")
if __name__ == '__main__':
main()