Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
804777eb
Commit
804777eb
authored
Aug 22, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add two question indexs
parent
99908daa
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
10 deletions
+40
-10
main.py
eda/recommended_indexs/main.py
+40
-10
No files found.
eda/recommended_indexs/main.py
View file @
804777eb
...
...
@@ -8,6 +8,13 @@ from func import *
print
(
"开始获取特征数据..."
)
#1.0 question曝光占比(=question被曝光数/总cid被曝光数)
question_imp_rate_all
=
CidRate
(
"all"
,
"question"
)
.
get_cid_imp_rate
(
"所有"
)
question_imp_rate_ios
=
CidRate
(
"ios"
,
"question"
)
.
get_cid_imp_rate
(
"苹果"
)
question_imp_rate_android
=
CidRate
(
"android"
,
"question"
)
.
get_cid_imp_rate
(
"安卓"
)
question_imp_rate_result
=
[
question_imp_rate_all
,
question_imp_rate_ios
,
question_imp_rate_android
]
print
(
"已获取question曝光占比"
)
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all
=
CidRate
(
"all"
,
"answer"
)
.
get_cid_imp_rate
(
"所有"
)
answer_imp_rate_ios
=
CidRate
(
"ios"
,
"answer"
)
.
get_cid_imp_rate
(
"苹果"
)
...
...
@@ -42,21 +49,28 @@ click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
print
(
"已获取点击answer用户占比"
)
#1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
#1.6 点击question用户占比(=点击question用户数/曝光question用户数)
click_question_all
=
ClkCidUidRate
(
"all"
,
"question"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_question_ios
=
ClkCidUidRate
(
"ios"
,
"question"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_question_android
=
ClkCidUidRate
(
"android"
,
"question"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_question_result
=
[
click_question_all
,
click_question_ios
,
click_question_android
]
print
(
"已获取点击question用户占比"
)
#1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all
=
ClkCidUidRate
(
"all"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_diary_ios
=
ClkCidUidRate
(
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_android
=
ClkCidUidRate
(
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
print
(
"已获取点击diary用户占比"
)
#1.
7
有点击用户占比(=有点击用户数/有曝光用户数)
#1.
8
有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all
=
ClkCidUidRate
(
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_ios
=
ClkCidUidRate
(
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_android
=
ClkCidUidRate
(
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_everything_result
=
[
click_everything_all
,
click_everything_ios
,
click_everything_android
]
print
(
"已获取有点击用户占比"
)
#1.
8
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
#1.
9
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
try
:
click_zero_uid_detail_all
=
get_click_zero_uid_rate_detail
(
"all"
)
click_zero_uid_detail_all
[
"platform"
]
=
"所有"
...
...
@@ -107,14 +121,16 @@ def result2file():
line
=
"""数据日期:{}
内容概览:以下所有数据都是昨天一天的首页的
1. 比例特征
1.0 question曝光占比(=question被曝光数/总cid被曝光数)
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.4 活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.7 有点击用户占比(=有点击用户数/有曝光用户数)
1.8 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
1.6 点击question用户占比(=点击question用户数/曝光question用户数)
1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.8 有点击用户占比(=有点击用户数/有曝光用户数)
1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
...
...
@@ -128,6 +144,12 @@ def result2file():
f
.
write
(
line
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"#1.0question曝光占比(=question被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"question被曝光数"
,
"总cid被曝光数"
,
"question被曝光数占比"
))
for
i
in
question_imp_rate_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"answer被曝光数"
,
"总cid被曝光数"
,
"answer被曝光数占比"
))
for
i
in
answer_imp_rate_result
:
...
...
@@ -158,20 +180,26 @@ def result2file():
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.6点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
"#1.6点击question用户占比(=点击question用户数/曝光question用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击question用户数"
,
"曝光question用户数"
,
"击question用户占比"
))
for
i
in
click_question_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.7点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击diary用户数"
,
"曝光diary用户数"
,
"击diary用户占比"
))
for
i
in
click_diary_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.
7
有点击用户占比(=有点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
"#1.
8
有点击用户占比(=有点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"have点击用户数"
,
"have曝光用户数"
,
"have点击用户占比"
))
for
i
in
click_everything_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
if
"click_zero_uid_detail_result"
in
dir
():
f
.
write
(
"#1.
8
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
\n
"
)
f
.
write
(
"#1.
9
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
\n
"
)
f
.
write
(
"平台"
+
'
\t\t
'
+
"0-7"
+
'
\t\t
'
+
"7-14"
+
'
\t\t
'
+
\
"14-30"
+
'
\t\t
'
+
"30-60"
+
'
\t\t
'
+
"60-90"
+
'
\t\t
'
+
"90+"
+
'
\n
'
)
for
i
in
click_zero_uid_detail_result
:
...
...
@@ -243,7 +271,9 @@ def rate2file():
str
(
activate_uid_imp_all
[
3
])
+
','
+
str
(
activate_uid_imp_beijing
[
3
])
+
','
+
\
str
(
click_answer_all
[
3
])
+
','
+
str
(
click_answer_ios
[
3
])
+
','
+
str
(
click_answer_android
[
3
])
+
','
+
\
str
(
click_diary_all
[
3
])
+
','
+
str
(
click_diary_ios
[
3
])
+
','
+
str
(
click_diary_android
[
3
])
+
','
+
\
str
(
click_everything_all
[
3
])
+
','
+
str
(
click_everything_ios
[
3
])
+
','
+
str
(
click_everything_android
[
3
])
+
'
\n
'
str
(
click_everything_all
[
3
])
+
','
+
str
(
click_everything_ios
[
3
])
+
','
+
str
(
click_everything_android
[
3
])
+
','
+
\
str
(
question_imp_rate_all
[
3
])
+
','
+
str
(
question_imp_rate_ios
[
3
])
+
','
+
str
(
question_imp_rate_android
[
3
])
+
','
+
\
str
(
click_question_all
[
3
])
+
','
+
str
(
click_question_ios
[
3
])
+
','
+
str
(
click_question_android
[
3
])
+
'
\n
'
f
.
write
(
line
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment