Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
d9ed33cc
Commit
d9ed33cc
authored
Aug 22, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
add filter cid
parents
eef1f0c7
7be44615
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
40 additions
and
10 deletions
+40
-10
main.py
eda/recommended_indexs/main.py
+40
-10
No files found.
eda/recommended_indexs/main.py
View file @
d9ed33cc
...
@@ -8,6 +8,13 @@ from func import *
...
@@ -8,6 +8,13 @@ from func import *
print
(
"开始获取特征数据..."
)
print
(
"开始获取特征数据..."
)
#1.0 question曝光占比(=question被曝光数/总cid被曝光数)
question_imp_rate_all
=
CidRate
(
"all"
,
"question"
)
.
get_cid_imp_rate
(
"所有"
)
question_imp_rate_ios
=
CidRate
(
"ios"
,
"question"
)
.
get_cid_imp_rate
(
"苹果"
)
question_imp_rate_android
=
CidRate
(
"android"
,
"question"
)
.
get_cid_imp_rate
(
"安卓"
)
question_imp_rate_result
=
[
question_imp_rate_all
,
question_imp_rate_ios
,
question_imp_rate_android
]
print
(
"已获取question曝光占比"
)
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
#1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
answer_imp_rate_all
=
CidRate
(
"all"
,
"answer"
)
.
get_cid_imp_rate
(
"所有"
)
answer_imp_rate_all
=
CidRate
(
"all"
,
"answer"
)
.
get_cid_imp_rate
(
"所有"
)
answer_imp_rate_ios
=
CidRate
(
"ios"
,
"answer"
)
.
get_cid_imp_rate
(
"苹果"
)
answer_imp_rate_ios
=
CidRate
(
"ios"
,
"answer"
)
.
get_cid_imp_rate
(
"苹果"
)
...
@@ -42,21 +49,28 @@ click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("
...
@@ -42,21 +49,28 @@ click_answer_android = ClkCidUidRate("android","answer").get_clk_cid_uid_rate("
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
print
(
"已获取点击answer用户占比"
)
print
(
"已获取点击answer用户占比"
)
#1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
#1.6 点击question用户占比(=点击question用户数/曝光question用户数)
click_question_all
=
ClkCidUidRate
(
"all"
,
"question"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_question_ios
=
ClkCidUidRate
(
"ios"
,
"question"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_question_android
=
ClkCidUidRate
(
"android"
,
"question"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_question_result
=
[
click_question_all
,
click_question_ios
,
click_question_android
]
print
(
"已获取点击question用户占比"
)
#1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all
=
ClkCidUidRate
(
"all"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_diary_all
=
ClkCidUidRate
(
"all"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_diary_ios
=
ClkCidUidRate
(
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_ios
=
ClkCidUidRate
(
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_android
=
ClkCidUidRate
(
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_android
=
ClkCidUidRate
(
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
print
(
"已获取点击diary用户占比"
)
print
(
"已获取点击diary用户占比"
)
#1.
7
有点击用户占比(=有点击用户数/有曝光用户数)
#1.
8
有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all
=
ClkCidUidRate
(
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_all
=
ClkCidUidRate
(
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_ios
=
ClkCidUidRate
(
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_ios
=
ClkCidUidRate
(
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_android
=
ClkCidUidRate
(
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_everything_android
=
ClkCidUidRate
(
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_everything_result
=
[
click_everything_all
,
click_everything_ios
,
click_everything_android
]
click_everything_result
=
[
click_everything_all
,
click_everything_ios
,
click_everything_android
]
print
(
"已获取有点击用户占比"
)
print
(
"已获取有点击用户占比"
)
#1.
8
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
#1.
9
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
try
:
try
:
click_zero_uid_detail_all
=
get_click_zero_uid_rate_detail
(
"all"
)
click_zero_uid_detail_all
=
get_click_zero_uid_rate_detail
(
"all"
)
click_zero_uid_detail_all
[
"platform"
]
=
"所有"
click_zero_uid_detail_all
[
"platform"
]
=
"所有"
...
@@ -107,14 +121,16 @@ def result2file():
...
@@ -107,14 +121,16 @@ def result2file():
line
=
"""数据日期:{}
line
=
"""数据日期:{}
内容概览:以下所有数据都是昨天一天的首页的
内容概览:以下所有数据都是昨天一天的首页的
1. 比例特征
1. 比例特征
1.0 question曝光占比(=question被曝光数/总cid被曝光数)
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.2 diary曝光占比(=diary被曝光数/总cid被曝光数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.3 活跃用户点击率(=有点击用户点击次数/有点击用户曝光次数)
1.4 活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)
1.4 活跃用户平均每天曝光次数(=活跃用户曝光次数/独立活跃用户数)
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.6 点击question用户占比(=点击question用户数/曝光question用户数)
1.7 有点击用户占比(=有点击用户数/有曝光用户数)
1.7 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.8 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
1.8 有点击用户占比(=有点击用户数/有曝光用户数)
1.9 无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:平台一排的数字指的是距离当前时间的天数
2.Top特征
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
2.2 Top 100 diary (sorted by ctr)
...
@@ -128,6 +144,12 @@ def result2file():
...
@@ -128,6 +144,12 @@ def result2file():
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"#1.0question曝光占比(=question被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"question被曝光数"
,
"总cid被曝光数"
,
"question被曝光数占比"
))
for
i
in
question_imp_rate_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"answer被曝光数"
,
"总cid被曝光数"
,
"answer被曝光数占比"
))
f
.
write
(
tplt
.
format
(
"平台"
,
"answer被曝光数"
,
"总cid被曝光数"
,
"answer被曝光数占比"
))
for
i
in
answer_imp_rate_result
:
for
i
in
answer_imp_rate_result
:
...
@@ -158,20 +180,26 @@ def result2file():
...
@@ -158,20 +180,26 @@ def result2file():
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.6点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
"#1.6点击question用户占比(=点击question用户数/曝光question用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击question用户数"
,
"曝光question用户数"
,
"击question用户占比"
))
for
i
in
click_question_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.7点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击diary用户数"
,
"曝光diary用户数"
,
"击diary用户占比"
))
f
.
write
(
tplt
.
format
(
"平台"
,
"点击diary用户数"
,
"曝光diary用户数"
,
"击diary用户占比"
))
for
i
in
click_diary_result
:
for
i
in
click_diary_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
'
\n
'
)
f
.
write
(
"#1.
7
有点击用户占比(=有点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
"#1.
8
有点击用户占比(=有点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"have点击用户数"
,
"have曝光用户数"
,
"have点击用户占比"
))
f
.
write
(
tplt
.
format
(
"平台"
,
"have点击用户数"
,
"have曝光用户数"
,
"have点击用户占比"
))
for
i
in
click_everything_result
:
for
i
in
click_everything_result
:
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
"{}
%
"
.
format
(
round
(
i
[
3
]
*
100
,
2
)))
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
f
.
write
(
'
\n
'
)
if
"click_zero_uid_detail_result"
in
dir
():
if
"click_zero_uid_detail_result"
in
dir
():
f
.
write
(
"#1.
8
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
\n
"
)
f
.
write
(
"#1.
9
无点击用户数分布(=无点击用户∩激活用户 / 激活用户数) #注意:(]里面的数字指的是距离当前时间的天数
\n
"
)
f
.
write
(
"平台"
+
'
\t\t
'
+
"0-7"
+
'
\t\t
'
+
"7-14"
+
'
\t\t
'
+
\
f
.
write
(
"平台"
+
'
\t\t
'
+
"0-7"
+
'
\t\t
'
+
"7-14"
+
'
\t\t
'
+
\
"14-30"
+
'
\t\t
'
+
"30-60"
+
'
\t\t
'
+
"60-90"
+
'
\t\t
'
+
"90+"
+
'
\n
'
)
"14-30"
+
'
\t\t
'
+
"30-60"
+
'
\t\t
'
+
"60-90"
+
'
\t\t
'
+
"90+"
+
'
\n
'
)
for
i
in
click_zero_uid_detail_result
:
for
i
in
click_zero_uid_detail_result
:
...
@@ -243,7 +271,9 @@ def rate2file():
...
@@ -243,7 +271,9 @@ def rate2file():
str
(
activate_uid_imp_all
[
3
])
+
','
+
str
(
activate_uid_imp_beijing
[
3
])
+
','
+
\
str
(
activate_uid_imp_all
[
3
])
+
','
+
str
(
activate_uid_imp_beijing
[
3
])
+
','
+
\
str
(
click_answer_all
[
3
])
+
','
+
str
(
click_answer_ios
[
3
])
+
','
+
str
(
click_answer_android
[
3
])
+
','
+
\
str
(
click_answer_all
[
3
])
+
','
+
str
(
click_answer_ios
[
3
])
+
','
+
str
(
click_answer_android
[
3
])
+
','
+
\
str
(
click_diary_all
[
3
])
+
','
+
str
(
click_diary_ios
[
3
])
+
','
+
str
(
click_diary_android
[
3
])
+
','
+
\
str
(
click_diary_all
[
3
])
+
','
+
str
(
click_diary_ios
[
3
])
+
','
+
str
(
click_diary_android
[
3
])
+
','
+
\
str
(
click_everything_all
[
3
])
+
','
+
str
(
click_everything_ios
[
3
])
+
','
+
str
(
click_everything_android
[
3
])
+
'
\n
'
str
(
click_everything_all
[
3
])
+
','
+
str
(
click_everything_ios
[
3
])
+
','
+
str
(
click_everything_android
[
3
])
+
','
+
\
str
(
question_imp_rate_all
[
3
])
+
','
+
str
(
question_imp_rate_ios
[
3
])
+
','
+
str
(
question_imp_rate_android
[
3
])
+
','
+
\
str
(
click_question_all
[
3
])
+
','
+
str
(
click_question_ios
[
3
])
+
','
+
str
(
click_question_android
[
3
])
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment