Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
31f72a07
Commit
31f72a07
authored
Aug 09, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
新增实时预测
parents
9caf9b4e
aa7d043a
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
238 additions
and
84 deletions
+238
-84
getActivateUidCtr.py
eda/recommended_indexs/code/getActivateUidCtr.py
+3
-3
getAnswerImpRate.py
eda/recommended_indexs/code/getAnswerImpRate.py
+3
-3
getClickAnswerUidRate.py
eda/recommended_indexs/code/getClickAnswerUidRate.py
+3
-3
getClickDiaryUidRate.py
eda/recommended_indexs/code/getClickDiaryUidRate.py
+3
-3
getClickTimes2CountUid.py
eda/recommended_indexs/code/getClickTimes2CountUid.py
+3
-2
getClickZeroUidRate.py
eda/recommended_indexs/code/getClickZeroUidRate.py
+3
-3
getRate.py
eda/recommended_indexs/code/getRate.py
+18
-28
getTop100Answer.py
eda/recommended_indexs/code/getTop100Answer.py
+7
-11
getTop100Diary.py
eda/recommended_indexs/code/getTop100Diary.py
+7
-11
getTop100Question.py
eda/recommended_indexs/code/getTop100Question.py
+11
-15
start.sh
eda/recommended_indexs/code/start.sh
+2
-2
config.py
eda/test/config.py
+2
-0
getTopFeatures.py
eda/test/getTopFeatures.py
+136
-0
utils.py
eda/test/utils.py
+37
-0
No files found.
eda/recommended_indexs/code/getActivateUidCtr.py
View file @
31f72a07
...
@@ -21,7 +21,7 @@ def get_all_click_one_rate():
...
@@ -21,7 +21,7 @@ def get_all_click_one_rate():
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
impression_one_count
[
0
][
0
]
impression_one_count
=
impression_one_count
[
0
][
0
]
all_click_one_rate
=
click_one_count
/
impression_one_count
all_click_one_rate
=
click_one_count
/
impression_one_count
return
[
"
all
"
,
click_one_count
,
impression_one_count
,
round
(
all_click_one_rate
,
4
)]
return
[
"
所有
"
,
click_one_count
,
impression_one_count
,
round
(
all_click_one_rate
,
4
)]
...
@@ -35,7 +35,7 @@ def get_ios_click_one_rate():
...
@@ -35,7 +35,7 @@ def get_ios_click_one_rate():
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
impression_one_count
[
0
][
0
]
impression_one_count
=
impression_one_count
[
0
][
0
]
ios_click_one_rate
=
click_one_count
/
impression_one_count
ios_click_one_rate
=
click_one_count
/
impression_one_count
return
[
"
ios
"
,
click_one_count
,
impression_one_count
,
round
(
ios_click_one_rate
,
4
)]
return
[
"
苹果
"
,
click_one_count
,
impression_one_count
,
round
(
ios_click_one_rate
,
4
)]
#3 获取安卓平台的有点击用户点击率
#3 获取安卓平台的有点击用户点击率
...
@@ -47,7 +47,7 @@ def get_android_click_one_rate():
...
@@ -47,7 +47,7 @@ def get_android_click_one_rate():
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
con_sql
(
sql
)
impression_one_count
=
impression_one_count
[
0
][
0
]
impression_one_count
=
impression_one_count
[
0
][
0
]
android_click_one_rate
=
click_one_count
/
impression_one_count
android_click_one_rate
=
click_one_count
/
impression_one_count
return
[
"
android
"
,
click_one_count
,
impression_one_count
,
round
(
android_click_one_rate
,
4
)]
return
[
"
安卓
"
,
click_one_count
,
impression_one_count
,
round
(
android_click_one_rate
,
4
)]
...
...
eda/recommended_indexs/code/getAnswerImpRate.py
View file @
31f72a07
...
@@ -21,7 +21,7 @@ def get_all_answer_imp_rate():
...
@@ -21,7 +21,7 @@ def get_all_answer_imp_rate():
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
all_imp_count
[
0
][
0
]
all_imp_count
=
all_imp_count
[
0
][
0
]
all_answer_imp_rate
=
answer_imp_count
/
all_imp_count
all_answer_imp_rate
=
answer_imp_count
/
all_imp_count
return
[
"
all
"
,
answer_imp_count
,
all_imp_count
,
round
(
all_answer_imp_rate
,
4
)]
return
[
"
所有
"
,
answer_imp_count
,
all_imp_count
,
round
(
all_answer_imp_rate
,
4
)]
#2 获取ios平台的问答曝光占比
#2 获取ios平台的问答曝光占比
...
@@ -33,7 +33,7 @@ def get_ios_answer_imp_rate():
...
@@ -33,7 +33,7 @@ def get_ios_answer_imp_rate():
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
all_imp_count
[
0
][
0
]
all_imp_count
=
all_imp_count
[
0
][
0
]
ios_answer_imp_rate
=
answer_imp_count
/
all_imp_count
ios_answer_imp_rate
=
answer_imp_count
/
all_imp_count
return
[
"
ios
"
,
answer_imp_count
,
all_imp_count
,
round
(
ios_answer_imp_rate
,
4
)]
return
[
"
苹果
"
,
answer_imp_count
,
all_imp_count
,
round
(
ios_answer_imp_rate
,
4
)]
#3 获取安卓平台的问答曝光占比
#3 获取安卓平台的问答曝光占比
...
@@ -45,7 +45,7 @@ def get_android_answer_imp_rate():
...
@@ -45,7 +45,7 @@ def get_android_answer_imp_rate():
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
con_sql
(
sql
)
all_imp_count
=
all_imp_count
[
0
][
0
]
all_imp_count
=
all_imp_count
[
0
][
0
]
android_answer_imp_rate
=
answer_imp_count
/
all_imp_count
android_answer_imp_rate
=
answer_imp_count
/
all_imp_count
return
[
"
android
"
,
answer_imp_count
,
all_imp_count
,
round
(
android_answer_imp_rate
,
4
)]
return
[
"
安卓
"
,
answer_imp_count
,
all_imp_count
,
round
(
android_answer_imp_rate
,
4
)]
...
...
eda/recommended_indexs/code/getClickAnswerUidRate.py
View file @
31f72a07
...
@@ -21,7 +21,7 @@ def get_all_click_answer_rate():
...
@@ -21,7 +21,7 @@ def get_all_click_answer_rate():
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
impression_answer_count
[
0
][
0
]
impression_answer_count
=
impression_answer_count
[
0
][
0
]
all_click_answer_rate
=
click_answer_count
/
impression_answer_count
all_click_answer_rate
=
click_answer_count
/
impression_answer_count
return
[
"
all
"
,
click_answer_count
,
impression_answer_count
,
round
(
all_click_answer_rate
,
4
)]
return
[
"
所有
"
,
click_answer_count
,
impression_answer_count
,
round
(
all_click_answer_rate
,
4
)]
...
@@ -34,7 +34,7 @@ def get_ios_click_answer_rate():
...
@@ -34,7 +34,7 @@ def get_ios_click_answer_rate():
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
impression_answer_count
[
0
][
0
]
impression_answer_count
=
impression_answer_count
[
0
][
0
]
ios_click_answer_rate
=
click_answer_count
/
impression_answer_count
ios_click_answer_rate
=
click_answer_count
/
impression_answer_count
return
[
"
ios
"
,
click_answer_count
,
impression_answer_count
,
round
(
ios_click_answer_rate
,
4
)]
return
[
"
苹果
"
,
click_answer_count
,
impression_answer_count
,
round
(
ios_click_answer_rate
,
4
)]
...
@@ -48,7 +48,7 @@ def get_android_click_answer_rate():
...
@@ -48,7 +48,7 @@ def get_android_click_answer_rate():
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
con_sql
(
sql
)
impression_answer_count
=
impression_answer_count
[
0
][
0
]
impression_answer_count
=
impression_answer_count
[
0
][
0
]
android_click_answer_rate
=
click_answer_count
/
impression_answer_count
android_click_answer_rate
=
click_answer_count
/
impression_answer_count
return
[
"
android
"
,
click_answer_count
,
impression_answer_count
,
round
(
android_click_answer_rate
,
4
)]
return
[
"
安卓
"
,
click_answer_count
,
impression_answer_count
,
round
(
android_click_answer_rate
,
4
)]
...
...
eda/recommended_indexs/code/getClickDiaryUidRate.py
View file @
31f72a07
...
@@ -21,7 +21,7 @@ def get_all_click_diary_rate():
...
@@ -21,7 +21,7 @@ def get_all_click_diary_rate():
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
impression_diary_count
[
0
][
0
]
impression_diary_count
=
impression_diary_count
[
0
][
0
]
all_click_diary_rate
=
click_diary_count
/
impression_diary_count
all_click_diary_rate
=
click_diary_count
/
impression_diary_count
return
[
"
all
"
,
click_diary_count
,
impression_diary_count
,
round
(
all_click_diary_rate
,
4
)]
return
[
"
所有
"
,
click_diary_count
,
impression_diary_count
,
round
(
all_click_diary_rate
,
4
)]
...
@@ -34,7 +34,7 @@ def get_ios_click_diary_rate():
...
@@ -34,7 +34,7 @@ def get_ios_click_diary_rate():
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
impression_diary_count
[
0
][
0
]
impression_diary_count
=
impression_diary_count
[
0
][
0
]
ios_click_diary_rate
=
click_diary_count
/
impression_diary_count
ios_click_diary_rate
=
click_diary_count
/
impression_diary_count
return
[
"
ios
"
,
click_diary_count
,
impression_diary_count
,
round
(
ios_click_diary_rate
,
4
)]
return
[
"
苹果
"
,
click_diary_count
,
impression_diary_count
,
round
(
ios_click_diary_rate
,
4
)]
#3 获取安卓平台的点击问答用户占比
#3 获取安卓平台的点击问答用户占比
...
@@ -46,7 +46,7 @@ def get_android_click_diary_rate():
...
@@ -46,7 +46,7 @@ def get_android_click_diary_rate():
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
con_sql
(
sql
)
impression_diary_count
=
impression_diary_count
[
0
][
0
]
impression_diary_count
=
impression_diary_count
[
0
][
0
]
android_click_diary_rate
=
click_diary_count
/
impression_diary_count
android_click_diary_rate
=
click_diary_count
/
impression_diary_count
return
[
"
android
"
,
click_diary_count
,
impression_diary_count
,
round
(
android_click_diary_rate
,
4
)]
return
[
"
安卓
"
,
click_diary_count
,
impression_diary_count
,
round
(
android_click_diary_rate
,
4
)]
...
...
eda/recommended_indexs/code/getClickTimes2CountUid.py
View file @
31f72a07
...
@@ -31,12 +31,13 @@ def get_click_times_to_count_uid_df():
...
@@ -31,12 +31,13 @@ def get_click_times_to_count_uid_df():
def
df2file
(
df
,
fpath
):
def
df2file
(
df
,
fpath
):
with
open
(
fpath
,
"w"
)
as
f
:
with
open
(
fpath
,
"w"
)
as
f
:
tplt
=
"{0:^10}
\t
{1:^10}
\n
"
f
.
write
(
"#2. Top特征
\n
"
)
f
.
write
(
"#2. Top特征
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"2.1用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
\n
"
)
f
.
write
(
"2.1用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
\n
"
)
f
.
write
(
"click_times"
+
"
\t
"
+
"count_uid"
+
"
\n
"
)
f
.
write
(
tplt
.
format
(
"click_times"
,
"count_uid"
)
)
for
row
in
df
.
iterrows
():
for
row
in
df
.
iterrows
():
line
=
str
(
row
[
1
][
0
])
+
"
\t
"
+
str
(
row
[
1
][
1
])
+
"
\n
"
line
=
tplt
.
format
(
row
[
1
][
0
],
row
[
1
][
1
])
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
"
\n\n
"
)
f
.
write
(
"
\n\n
"
)
...
...
eda/recommended_indexs/code/getClickZeroUidRate.py
View file @
31f72a07
...
@@ -23,7 +23,7 @@ def get_all_click_zero_rate():
...
@@ -23,7 +23,7 @@ def get_all_click_zero_rate():
impression_zero_count
=
impression_zero_count
[
0
][
0
]
impression_zero_count
=
impression_zero_count
[
0
][
0
]
click_zero_count
=
impression_zero_count
-
click_zero_count
click_zero_count
=
impression_zero_count
-
click_zero_count
all_click_zero_rate
=
click_zero_count
/
impression_zero_count
all_click_zero_rate
=
click_zero_count
/
impression_zero_count
return
[
"
all
"
,
click_zero_count
,
impression_zero_count
,
round
(
all_click_zero_rate
,
4
)]
return
[
"
所有
"
,
click_zero_count
,
impression_zero_count
,
round
(
all_click_zero_rate
,
4
)]
...
@@ -37,7 +37,7 @@ def get_ios_click_zero_rate():
...
@@ -37,7 +37,7 @@ def get_ios_click_zero_rate():
impression_zero_count
=
impression_zero_count
[
0
][
0
]
impression_zero_count
=
impression_zero_count
[
0
][
0
]
click_zero_count
=
impression_zero_count
-
click_zero_count
click_zero_count
=
impression_zero_count
-
click_zero_count
ios_click_zero_rate
=
click_zero_count
/
impression_zero_count
ios_click_zero_rate
=
click_zero_count
/
impression_zero_count
return
[
"
ios
"
,
click_zero_count
,
impression_zero_count
,
round
(
ios_click_zero_rate
,
4
)]
return
[
"
苹果
"
,
click_zero_count
,
impression_zero_count
,
round
(
ios_click_zero_rate
,
4
)]
#3 获取安卓平台的0点击用户占比
#3 获取安卓平台的0点击用户占比
...
@@ -50,7 +50,7 @@ def get_android_click_zero_rate():
...
@@ -50,7 +50,7 @@ def get_android_click_zero_rate():
impression_zero_count
=
impression_zero_count
[
0
][
0
]
impression_zero_count
=
impression_zero_count
[
0
][
0
]
click_zero_count
=
impression_zero_count
-
click_zero_count
click_zero_count
=
impression_zero_count
-
click_zero_count
android_click_zero_rate
=
click_zero_count
/
impression_zero_count
android_click_zero_rate
=
click_zero_count
/
impression_zero_count
return
[
"
android
"
,
click_zero_count
,
impression_zero_count
,
round
(
android_click_zero_rate
,
4
)]
return
[
"
安卓
"
,
click_zero_count
,
impression_zero_count
,
round
(
android_click_zero_rate
,
4
)]
...
...
eda/recommended_indexs/code/getRate.py
View file @
31f72a07
...
@@ -15,78 +15,68 @@ def get_yesterday_date():
...
@@ -15,78 +15,68 @@ def get_yesterday_date():
def
result2file
(
fpath
):
def
result2file
(
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
with
open
(
fpath
,
'w'
)
as
f
:
tplt
=
"{0:
\u3000
<6}
\t
{1:
\u3000
<15}
\t
{2:
\u3000
<15}
\t
{3:
\u3000
<15}
\n
"
f
.
write
(
"#注意:以下数据都是首页的
\n
"
)
f
.
write
(
"#注意:以下数据都是首页的
\n
"
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
f
.
write
(
"平台"
+
"
\t
"
+
"answer被曝光数"
+
"
\t
"
+
"总cid被曝光数"
+
"
\t
"
+
"answer被曝光数占比
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"answer被曝光数"
,
"总cid被曝光数"
,
"answer被曝光数占比"
)
)
all_answer_imp_rate
=
get_all_answer_imp_rate
()
all_answer_imp_rate
=
get_all_answer_imp_rate
()
ios_answer_imp_rate
=
get_ios_answer_imp_rate
()
ios_answer_imp_rate
=
get_ios_answer_imp_rate
()
android_answer_imp_rate
=
get_android_answer_imp_rate
()
android_answer_imp_rate
=
get_android_answer_imp_rate
()
lst
=
[
all_answer_imp_rate
,
ios_answer_imp_rate
,
android_answer_imp_rate
]
lst
=
[
all_answer_imp_rate
,
ios_answer_imp_rate
,
android_answer_imp_rate
]
for
i
in
lst
:
for
i
in
lst
:
line
=
""
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
i
[
3
])
for
j
in
i
:
line
+=
str
(
j
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
print
(
"1.1已将answer曝光占比存入文件"
)
print
(
"1.1已将answer曝光占比存入文件"
)
f
.
write
(
"#1.2
有点击用户点击率(=有点击用户点击次数/有点击
用户曝光次数)
\n
"
)
f
.
write
(
"#1.2
活跃用户点击率(=活跃用户点击次数/活跃
用户曝光次数)
\n
"
)
f
.
write
(
"平台"
+
"
\t
"
+
"有点击用户点击次数"
+
"
\t
"
+
"有点击用户曝光次数"
+
"
\t
"
+
"有点击用户点击率
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"active用户点击次数"
,
"active用户曝光次数"
,
"active用户点击率"
)
)
all_click_one_rate
=
get_all_click_one_rate
()
all_click_one_rate
=
get_all_click_one_rate
()
ios_click_one_rate
=
get_ios_click_one_rate
()
ios_click_one_rate
=
get_ios_click_one_rate
()
android_click_one_rate
=
get_android_click_one_rate
()
android_click_one_rate
=
get_android_click_one_rate
()
lst
=
[
all_click_one_rate
,
ios_click_one_rate
,
android_click_one_rate
]
lst
=
[
all_click_one_rate
,
ios_click_one_rate
,
android_click_one_rate
]
for
i
in
lst
:
for
i
in
lst
:
line
=
""
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
i
[
3
])
for
j
in
i
:
line
+=
str
(
j
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
print
(
"1.2已将有点击用户点击率存入文件"
)
f
.
write
(
'
\n
'
)
print
(
"1.2已将活跃用户点击率存入文件"
)
f
.
write
(
"#1.3点击answer用户占比(=点击answer用户数/曝光answer用户数)
\n
"
)
f
.
write
(
"#1.3点击answer用户占比(=点击answer用户数/曝光answer用户数)
\n
"
)
f
.
write
(
"平台"
+
"
\t
"
+
"点击answer用户数"
+
"
\t
"
+
"曝光answer用户数"
+
"
\t
"
+
"点击answer用户占比
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击answer用户数"
,
"曝光answer用户数"
,
"击answer用户占比"
)
)
all_click_answer_rate
=
get_all_click_answer_rate
()
all_click_answer_rate
=
get_all_click_answer_rate
()
ios_click_answer_rate
=
get_ios_click_answer_rate
()
ios_click_answer_rate
=
get_ios_click_answer_rate
()
android_click_answer_rate
=
get_android_click_answer_rate
()
android_click_answer_rate
=
get_android_click_answer_rate
()
lst
=
[
all_click_answer_rate
,
ios_click_answer_rate
,
android_click_answer_rate
]
lst
=
[
all_click_answer_rate
,
ios_click_answer_rate
,
android_click_answer_rate
]
for
i
in
lst
:
for
i
in
lst
:
line
=
""
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
i
[
3
])
for
j
in
i
:
line
+=
str
(
j
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
print
(
"1.3已将点击answer用户占比存入文件"
)
print
(
"1.3已将点击answer用户占比存入文件"
)
f
.
write
(
"#1.4点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
"#1.4点击diary用户占比(=点击diary用户数/曝光diary用户数)
\n
"
)
f
.
write
(
"平台"
+
"
\t
"
+
"点击diary用户数"
+
"
\t
"
+
"曝光diary用户数"
+
"
\t
"
+
"点击diary用户占比
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"点击diary用户数"
,
"曝光diary用户数"
,
"击diary用户占比"
)
)
all_click_diary_rate
=
get_all_click_diary_rate
()
all_click_diary_rate
=
get_all_click_diary_rate
()
ios_click_diary_rate
=
get_ios_click_diary_rate
()
ios_click_diary_rate
=
get_ios_click_diary_rate
()
android_click_diary_rate
=
get_android_click_diary_rate
()
android_click_diary_rate
=
get_android_click_diary_rate
()
lst
=
[
all_click_diary_rate
,
ios_click_diary_rate
,
android_click_diary_rate
]
lst
=
[
all_click_diary_rate
,
ios_click_diary_rate
,
android_click_diary_rate
]
for
i
in
lst
:
for
i
in
lst
:
line
=
""
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
i
[
3
])
for
j
in
i
:
line
+=
str
(
j
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
'
)
print
(
"1.4已将点击diary用户占比存入文件"
)
print
(
"1.4已将点击diary用户占比存入文件"
)
f
.
write
(
"#1.5无点击用户占比(=无点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
"#1.5无点击用户占比(=无点击用户数/有曝光用户数)
\n
"
)
f
.
write
(
"平台"
+
"
\t
"
+
"无点击用户数"
+
"
\t
"
+
"有曝光用户数"
+
"
\t
"
+
"无点击用户占比
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"no点击用户数"
,
"have曝光用户数"
,
"no点击用户占比"
)
)
all_click_zero_rate
=
get_all_click_zero_rate
()
all_click_zero_rate
=
get_all_click_zero_rate
()
ios_click_zero_rate
=
get_ios_click_zero_rate
()
ios_click_zero_rate
=
get_ios_click_zero_rate
()
android_click_zero_rate
=
get_android_click_zero_rate
()
android_click_zero_rate
=
get_android_click_zero_rate
()
lst
=
[
all_click_zero_rate
,
ios_click_zero_rate
,
android_click_zero_rate
]
lst
=
[
all_click_zero_rate
,
ios_click_zero_rate
,
android_click_zero_rate
]
for
i
in
lst
:
for
i
in
lst
:
line
=
""
line
=
tplt
.
format
(
i
[
0
],
i
[
1
],
i
[
2
],
i
[
3
])
for
j
in
i
:
line
+=
str
(
j
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
line
)
f
.
write
(
'
\n
\n
'
)
f
.
write
(
'
\n
'
)
print
(
"1.5已将无点击用户占比存入文件"
)
print
(
"1.5已将无点击用户占比存入文件"
)
...
...
eda/recommended_indexs/code/getTop100Answer.py
View file @
31f72a07
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def
result2file
(
result_lst
,
fpath
):
def
result2file
(
result_lst
,
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
with
open
(
fpath
,
'w'
)
as
f
:
header
=
"平台"
+
'
\t
'
+
"answer_id"
+
'
\t
'
+
"answer被点击数"
+
'
\t
'
+
"answer被曝光数"
+
'
\t
'
+
"answer被点击率"
+
'
\t
'
+
"answer链接"
+
'
\n
'
tplt
=
"{0:
\u3000
<4}
\t
{1:
\u3000
<12}
\t
{2:
\u3000
^6}
\t
{3:
\u3000
^6}
\t
{4:
\u3000
<8}
\t
{5:
\u3000
^15}
\n
"
f
.
write
(
"Top 100 Answer
\n
"
)
f
.
write
(
"Top 100 Answer
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"answer_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"answer链接"
)
)
for
i
in
result_lst
:
for
i
in
result_lst
:
for
j
in
i
:
for
j
in
i
:
line
=
""
f
.
write
(
tplt
.
format
(
j
[
0
],
j
[
1
],
j
[
2
],
j
[
3
],
j
[
4
],
j
[
5
]))
for
k
in
j
:
line
+=
str
(
k
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
if
i
!=
result_lst
[
-
1
]:
if
i
!=
result_lst
[
-
1
]:
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"answer_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"answer链接"
)
)
f
.
write
(
"
\n\n
"
)
f
.
write
(
"
\n\n
"
)
...
@@ -58,7 +54,7 @@ def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count
...
@@ -58,7 +54,7 @@ def get_all_top100_answer_rate_by_ctr(all_answer_count_by_click,all_answer_count
for
i
in
all_answer_count_by_click
:
for
i
in
all_answer_count_by_click
:
if
i
in
all_answer_count_by_imp
.
keys
()
and
all_answer_count_by_click
[
i
]
>
2
:
if
i
in
all_answer_count_by_imp
.
keys
()
and
all_answer_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
all_top100_answer_rate_by_ctr
.
append
((
"
all
"
,
i
,
all_answer_count_by_click
[
i
],
all_answer_count_by_imp
[
i
],
round
(
all_answer_count_by_click
[
i
]
/
all_answer_count_by_imp
[
i
],
4
),
url
))
all_top100_answer_rate_by_ctr
.
append
((
"
所有
"
,
i
,
all_answer_count_by_click
[
i
],
all_answer_count_by_imp
[
i
],
round
(
all_answer_count_by_click
[
i
]
/
all_answer_count_by_imp
[
i
],
4
),
url
))
all_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
all_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
all_top100_answer_rate_by_ctr
[:
100
]
if
len
(
all_top100_answer_rate_by_ctr
)
>
100
else
all_top100_answer_rate_by_ctr
return
all_top100_answer_rate_by_ctr
[:
100
]
if
len
(
all_top100_answer_rate_by_ctr
)
>
100
else
all_top100_answer_rate_by_ctr
...
@@ -82,7 +78,7 @@ def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count
...
@@ -82,7 +78,7 @@ def get_ios_top100_answer_rate_by_ctr(ios_answer_count_by_click,ios_answer_count
for
i
in
ios_answer_count_by_click
:
for
i
in
ios_answer_count_by_click
:
if
i
in
ios_answer_count_by_imp
.
keys
()
and
ios_answer_count_by_click
[
i
]
>
2
:
if
i
in
ios_answer_count_by_imp
.
keys
()
and
ios_answer_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
ios_top100_answer_rate_by_ctr
.
append
((
"
ios
"
,
i
,
ios_answer_count_by_click
[
i
],
ios_answer_count_by_imp
[
i
],
round
(
ios_answer_count_by_click
[
i
]
/
ios_answer_count_by_imp
[
i
],
4
),
url
))
ios_top100_answer_rate_by_ctr
.
append
((
"
苹果
"
,
i
,
ios_answer_count_by_click
[
i
],
ios_answer_count_by_imp
[
i
],
round
(
ios_answer_count_by_click
[
i
]
/
ios_answer_count_by_imp
[
i
],
4
),
url
))
ios_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
ios_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
ios_top100_answer_rate_by_ctr
[:
100
]
if
len
(
ios_top100_answer_rate_by_ctr
)
>
100
else
ios_top100_answer_rate_by_ctr
return
ios_top100_answer_rate_by_ctr
[:
100
]
if
len
(
ios_top100_answer_rate_by_ctr
)
>
100
else
ios_top100_answer_rate_by_ctr
...
@@ -106,7 +102,7 @@ def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_
...
@@ -106,7 +102,7 @@ def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_
for
i
in
android_answer_count_by_click
:
for
i
in
android_answer_count_by_click
:
if
i
in
android_answer_count_by_imp
.
keys
()
and
android_answer_count_by_click
[
i
]
>
2
:
if
i
in
android_answer_count_by_imp
.
keys
()
and
android_answer_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/answer/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
android_top100_answer_rate_by_ctr
.
append
((
"
android
"
,
i
,
android_answer_count_by_click
[
i
],
android_answer_count_by_imp
[
i
],
round
(
android_answer_count_by_click
[
i
]
/
android_answer_count_by_imp
[
i
],
4
),
url
))
android_top100_answer_rate_by_ctr
.
append
((
"
安卓
"
,
i
,
android_answer_count_by_click
[
i
],
android_answer_count_by_imp
[
i
],
round
(
android_answer_count_by_click
[
i
]
/
android_answer_count_by_imp
[
i
],
4
),
url
))
android_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
android_top100_answer_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
android_top100_answer_rate_by_ctr
[:
100
]
if
len
(
android_top100_answer_rate_by_ctr
)
>
100
else
android_top100_answer_rate_by_ctr
return
android_top100_answer_rate_by_ctr
[:
100
]
if
len
(
android_top100_answer_rate_by_ctr
)
>
100
else
android_top100_answer_rate_by_ctr
...
...
eda/recommended_indexs/code/getTop100Diary.py
View file @
31f72a07
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def
result2file
(
result_lst
,
fpath
):
def
result2file
(
result_lst
,
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
with
open
(
fpath
,
'w'
)
as
f
:
header
=
"平台"
+
'
\t
'
+
"diary_id"
+
'
\t
'
+
"diary被点击数"
+
'
\t
'
+
"diary被曝光数"
+
'
\t
'
+
"diary被点击率"
+
'
\t
'
+
"diary链接"
+
'
\n
'
tplt
=
"{0:
\u3000
<4}
\t
{1:
\u3000
<12}
\t
{2:
\u3000
^6}
\t
{3:
\u3000
^6}
\t
{4:
\u3000
<8}
\t
{5:
\u3000
^15}
\n
"
f
.
write
(
"Top 100 diary
\n
"
)
f
.
write
(
"Top 100 diary
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"diary_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"diary链接"
)
)
for
i
in
result_lst
:
for
i
in
result_lst
:
for
j
in
i
:
for
j
in
i
:
line
=
""
f
.
write
(
tplt
.
format
(
j
[
0
],
j
[
1
],
j
[
2
],
j
[
3
],
j
[
4
],
j
[
5
]))
for
k
in
j
:
line
+=
str
(
k
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
if
i
!=
result_lst
[
-
1
]:
if
i
!=
result_lst
[
-
1
]:
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"diary_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"diary链接"
)
)
f
.
write
(
"
\n\n
"
)
f
.
write
(
"
\n\n
"
)
...
@@ -57,7 +53,7 @@ def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by
...
@@ -57,7 +53,7 @@ def get_all_top100_diary_rate_by_ctr(all_diary_count_by_click,all_diary_count_by
for
i
in
all_diary_count_by_click
:
for
i
in
all_diary_count_by_click
:
if
i
in
all_diary_count_by_imp
.
keys
()
and
all_diary_count_by_click
[
i
]
>
4
:
if
i
in
all_diary_count_by_imp
.
keys
()
and
all_diary_count_by_click
[
i
]
>
4
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
all_top100_diary_rate_by_ctr
.
append
((
"
all
"
,
i
,
all_diary_count_by_click
[
i
],
all_diary_count_by_imp
[
i
],
round
(
all_diary_count_by_click
[
i
]
/
all_diary_count_by_imp
[
i
],
4
),
url
))
all_top100_diary_rate_by_ctr
.
append
((
"
所有
"
,
i
,
all_diary_count_by_click
[
i
],
all_diary_count_by_imp
[
i
],
round
(
all_diary_count_by_click
[
i
]
/
all_diary_count_by_imp
[
i
],
4
),
url
))
all_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
all_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
all_top100_diary_rate_by_ctr
[:
100
]
if
len
(
all_top100_diary_rate_by_ctr
)
>
100
else
all_top100_diary_rate_by_ctr
return
all_top100_diary_rate_by_ctr
[:
100
]
if
len
(
all_top100_diary_rate_by_ctr
)
>
100
else
all_top100_diary_rate_by_ctr
...
@@ -81,7 +77,7 @@ def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_
...
@@ -81,7 +77,7 @@ def get_ios_top100_diary_rate_by_ctr(ios_top100_diary_count_by_click,ios_top100_
for
i
in
ios_diary_count_by_click
:
for
i
in
ios_diary_count_by_click
:
if
i
in
ios_diary_count_by_imp
.
keys
()
and
ios_diary_count_by_click
[
i
]
>
4
:
if
i
in
ios_diary_count_by_imp
.
keys
()
and
ios_diary_count_by_click
[
i
]
>
4
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
ios_top100_diary_rate_by_ctr
.
append
((
"
ios
"
,
i
,
ios_diary_count_by_click
[
i
],
ios_diary_count_by_imp
[
i
],
round
(
ios_diary_count_by_click
[
i
]
/
ios_diary_count_by_imp
[
i
],
4
),
url
))
ios_top100_diary_rate_by_ctr
.
append
((
"
苹果
"
,
i
,
ios_diary_count_by_click
[
i
],
ios_diary_count_by_imp
[
i
],
round
(
ios_diary_count_by_click
[
i
]
/
ios_diary_count_by_imp
[
i
],
4
),
url
))
ios_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
ios_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
ios_top100_diary_rate_by_ctr
[:
100
]
if
len
(
ios_top100_diary_rate_by_ctr
)
>
100
else
ios_top100_diary_rate_by_ctr
return
ios_top100_diary_rate_by_ctr
[:
100
]
if
len
(
ios_top100_diary_rate_by_ctr
)
>
100
else
ios_top100_diary_rate_by_ctr
...
@@ -105,7 +101,7 @@ def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,and
...
@@ -105,7 +101,7 @@ def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,and
for
i
in
android_diary_count_by_click
:
for
i
in
android_diary_count_by_click
:
if
i
in
android_diary_count_by_imp
.
keys
()
and
android_diary_count_by_click
[
i
]
>
4
:
if
i
in
android_diary_count_by_imp
.
keys
()
and
android_diary_count_by_click
[
i
]
>
4
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
android_top100_diary_rate_by_ctr
.
append
((
"
android
"
,
i
,
android_diary_count_by_click
[
i
],
android_diary_count_by_imp
[
i
],
round
(
android_diary_count_by_click
[
i
]
/
android_diary_count_by_imp
[
i
],
4
),
url
))
android_top100_diary_rate_by_ctr
.
append
((
"
安卓
"
,
i
,
android_diary_count_by_click
[
i
],
android_diary_count_by_imp
[
i
],
round
(
android_diary_count_by_click
[
i
]
/
android_diary_count_by_imp
[
i
],
4
),
url
))
android_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
android_top100_diary_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
android_top100_diary_rate_by_ctr
[:
100
]
if
len
(
android_top100_diary_rate_by_ctr
)
>
100
else
android_top100_diary_rate_by_ctr
return
android_top100_diary_rate_by_ctr
[:
100
]
if
len
(
android_top100_diary_rate_by_ctr
)
>
100
else
android_top100_diary_rate_by_ctr
...
...
eda/recommended_indexs/code/getTop100Question.py
View file @
31f72a07
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
...
@@ -20,20 +20,16 @@ def tuple2dict(tuple_result):
def
result2file
(
result_lst
,
fpath
):
def
result2file
(
result_lst
,
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
with
open
(
fpath
,
'w'
)
as
f
:
header
=
"平台"
+
'
\t
'
+
"question_id"
+
'
\t
'
+
"question被点击数"
+
'
\t
'
+
"question被曝光数"
+
'
\t
'
+
"question被点击率"
+
'
\t
'
+
"question链接"
+
'
\n
'
tplt
=
"{0:
\u3000
<4}
\t
{1:
\u3000
<12}
\t
{2:
\u3000
^6}
\t
{3:
\u3000
^6}
\t
{4:
\u3000
<8}
\t
{5:
\u3000
^15}
\n
"
f
.
write
(
"Top 100
q
uestion
\n
"
)
f
.
write
(
"Top 100
Q
uestion
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"question_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"question链接"
)
)
for
i
in
result_lst
:
for
i
in
result_lst
:
for
j
in
i
:
for
j
in
i
:
line
=
""
f
.
write
(
tplt
.
format
(
j
[
0
],
j
[
1
],
j
[
2
],
j
[
3
],
j
[
4
],
j
[
5
]))
for
k
in
j
:
line
+=
str
(
k
)
+
'
\t
'
line
=
line
[:
-
1
]
+
'
\n
'
f
.
write
(
line
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
if
i
!=
result_lst
[
-
1
]:
if
i
!=
result_lst
[
-
1
]:
f
.
write
(
header
)
f
.
write
(
tplt
.
format
(
"平台"
,
"question_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"question链接"
)
)
f
.
write
(
"
\n\n
"
)
f
.
write
(
"
\n\n
"
)
...
@@ -58,14 +54,14 @@ def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question
...
@@ -58,14 +54,14 @@ def get_all_top100_question_rate_by_ctr(all_question_count_by_click,all_question
if
all_question_count_by_imp
==
{}:
if
all_question_count_by_imp
==
{}:
for
i
in
all_question_count_by_click
:
for
i
in
all_question_count_by_click
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
all_top100_question_rate_by_ctr
.
append
((
"
all
"
,
i
,
all_question_count_by_click
[
i
],
0
,
0
,
url
))
all_top100_question_rate_by_ctr
.
append
((
"
所有
"
,
i
,
all_question_count_by_click
[
i
],
0
,
0
,
url
))
all_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
all_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
return
all_top100_question_rate_by_ctr
[:
100
]
if
len
(
all_top100_question_rate_by_ctr
)
>
100
else
all_top100_question_rate_by_ctr
return
all_top100_question_rate_by_ctr
[:
100
]
if
len
(
all_top100_question_rate_by_ctr
)
>
100
else
all_top100_question_rate_by_ctr
else
:
else
:
for
i
in
all_question_count_by_click
:
for
i
in
all_question_count_by_click
:
if
i
in
all_question_count_by_imp
.
keys
()
and
all_question_count_by_click
[
i
]
>
2
:
if
i
in
all_question_count_by_imp
.
keys
()
and
all_question_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
all_top100_question_rate_by_ctr
.
append
((
"
all
"
,
i
,
all_question_count_by_click
[
i
],
all_question_count_by_imp
[
i
],
round
(
all_question_count_by_click
[
i
]
/
all_question_count_by_imp
[
i
],
4
),
url
))
all_top100_question_rate_by_ctr
.
append
((
"
所有
"
,
i
,
all_question_count_by_click
[
i
],
all_question_count_by_imp
[
i
],
round
(
all_question_count_by_click
[
i
]
/
all_question_count_by_imp
[
i
],
4
),
url
))
all_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
all_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
all_top100_question_rate_by_ctr
[:
100
]
if
len
(
all_top100_question_rate_by_ctr
)
>
100
else
all_top100_question_rate_by_ctr
return
all_top100_question_rate_by_ctr
[:
100
]
if
len
(
all_top100_question_rate_by_ctr
)
>
100
else
all_top100_question_rate_by_ctr
...
@@ -89,14 +85,14 @@ def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question
...
@@ -89,14 +85,14 @@ def get_ios_top100_question_rate_by_ctr(ios_question_count_by_click,ios_question
if
ios_question_count_by_imp
==
{}:
if
ios_question_count_by_imp
==
{}:
for
i
in
ios_question_count_by_click
:
for
i
in
ios_question_count_by_click
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
ios_top100_question_rate_by_ctr
.
append
((
"
ios
"
,
i
,
ios_question_count_by_click
[
i
],
0
,
0
,
url
))
ios_top100_question_rate_by_ctr
.
append
((
"
苹果
"
,
i
,
ios_question_count_by_click
[
i
],
0
,
0
,
url
))
ios_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
ios_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
return
ios_top100_question_rate_by_ctr
[:
100
]
if
len
(
ios_top100_question_rate_by_ctr
)
>
100
else
ios_top100_question_rate_by_ctr
return
ios_top100_question_rate_by_ctr
[:
100
]
if
len
(
ios_top100_question_rate_by_ctr
)
>
100
else
ios_top100_question_rate_by_ctr
else
:
else
:
for
i
in
ios_question_count_by_click
:
for
i
in
ios_question_count_by_click
:
if
i
in
ios_question_count_by_imp
.
keys
()
and
ios_question_count_by_click
[
i
]
>
2
:
if
i
in
ios_question_count_by_imp
.
keys
()
and
ios_question_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
ios_top100_question_rate_by_ctr
.
append
((
"
ios
"
,
i
,
ios_question_count_by_click
[
i
],
ios_question_count_by_imp
[
i
],
round
(
ios_question_count_by_click
[
i
]
/
ios_question_count_by_imp
[
i
],
4
),
url
))
ios_top100_question_rate_by_ctr
.
append
((
"
苹果
"
,
i
,
ios_question_count_by_click
[
i
],
ios_question_count_by_imp
[
i
],
round
(
ios_question_count_by_click
[
i
]
/
ios_question_count_by_imp
[
i
],
4
),
url
))
ios_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
ios_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
ios_top100_question_rate_by_ctr
[:
100
]
if
len
(
ios_top100_question_rate_by_ctr
)
>
100
else
ios_top100_question_rate_by_ctr
return
ios_top100_question_rate_by_ctr
[:
100
]
if
len
(
ios_top100_question_rate_by_ctr
)
>
100
else
ios_top100_question_rate_by_ctr
...
@@ -120,14 +116,14 @@ def get_android_top100_question_rate_by_ctr(android_question_count_by_click,andr
...
@@ -120,14 +116,14 @@ def get_android_top100_question_rate_by_ctr(android_question_count_by_click,andr
if
android_question_count_by_imp
==
{}:
if
android_question_count_by_imp
==
{}:
for
i
in
android_question_count_by_click
:
for
i
in
android_question_count_by_click
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
android_top100_question_rate_by_ctr
.
append
((
"
android
"
,
i
,
android_question_count_by_click
[
i
],
0
,
0
,
url
))
android_top100_question_rate_by_ctr
.
append
((
"
安卓
"
,
i
,
android_question_count_by_click
[
i
],
0
,
0
,
url
))
android_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
android_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
return
android_top100_question_rate_by_ctr
[:
100
]
if
len
(
android_top100_question_rate_by_ctr
)
>
100
else
android_top100_question_rate_by_ctr
return
android_top100_question_rate_by_ctr
[:
100
]
if
len
(
android_top100_question_rate_by_ctr
)
>
100
else
android_top100_question_rate_by_ctr
else
:
else
:
for
i
in
android_question_count_by_click
:
for
i
in
android_question_count_by_click
:
if
i
in
android_question_count_by_imp
.
keys
()
and
android_question_count_by_click
[
i
]
>
2
:
if
i
in
android_question_count_by_imp
.
keys
()
and
android_question_count_by_click
[
i
]
>
2
:
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
url
=
"http://m.igengmei.com/question/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
android_top100_question_rate_by_ctr
.
append
((
"
android
"
,
i
,
android_question_count_by_click
[
i
],
android_question_count_by_imp
[
i
],
round
(
android_question_count_by_click
[
i
]
/
android_question_count_by_imp
[
i
],
4
),
url
))
android_top100_question_rate_by_ctr
.
append
((
"
安卓
"
,
i
,
android_question_count_by_click
[
i
],
android_question_count_by_imp
[
i
],
round
(
android_question_count_by_click
[
i
]
/
android_question_count_by_imp
[
i
],
4
),
url
))
android_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
android_top100_question_rate_by_ctr
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
android_top100_question_rate_by_ctr
[:
100
]
if
len
(
android_top100_question_rate_by_ctr
)
>
100
else
android_top100_question_rate_by_ctr
return
android_top100_question_rate_by_ctr
[:
100
]
if
len
(
android_top100_question_rate_by_ctr
)
>
100
else
android_top100_question_rate_by_ctr
...
...
eda/recommended_indexs/code/start.sh
View file @
31f72a07
...
@@ -4,4 +4,4 @@ python getTop100Diary.py
...
@@ -4,4 +4,4 @@ python getTop100Diary.py
python getTop100Answer.py
python getTop100Answer.py
python getTop100Question.py
python getTop100Question.py
dt
=
$(
date
-d
last-day +%Y%m%d
)
dt
=
$(
date
-d
last-day +%Y%m%d
)
cat
1rate_features_
$dt
.txt 2click_times_to_count_uid_
$dt
.txt 3top100_ctr_diary_
$dt
.txt 4top100_ctr_answer_
$dt
.txt 5top100_ctr_question_
$dt
.txt
>
result_all_
$dt
.txt
cat
/data2/models/eda/recommended_indexs/1rate_features_
$dt
.txt /data2/models/eda/recommended_indexs/2click_times_to_count_uid_
$dt
.txt /data2/models/eda/recommended_indexs/3top100_ctr_diary_
$dt
.txt /data2/models/eda/recommended_indexs/4top100_ctr_answer_
$dt
.txt /data2/models/eda/recommended_indexs/5top100_ctr_question_
$dt
.txt
>
/data2/models/eda/recommended_indexs/result_all_
$dt
.txt
\ No newline at end of file
\ No newline at end of file
eda/test/config.py
0 → 100644
View file @
31f72a07
DIRECTORY_PATH
=
"/data2/models/eda/recommended_indexs/"
\ No newline at end of file
eda/test/getTopFeatures.py
0 → 100644
View file @
31f72a07
# -*- coding: UTF-8 -*-
from
utils
import
con_sql
,
tuple2dict
,
get_yesterday_date
from
config
import
DIRECTORY_PATH
class
TopFeatures
(
object
):
def
__init__
(
self
,
ndays
,
platform
,
cid_type
,
top_n
=-
1
):
"""
ndays : 1;2;3;4..
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the rows of the result
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
self
.
platform
=
"='AppStore'"
elif
platform
==
"android"
:
self
.
platform
=
"!='AppStore'"
else
:
self
.
platform
=
" is not null"
self
.
cid_type
=
cid_type
self
.
top_n
=
top_n
def
get_click_times
(
self
):
# rtype : dict
if
self
.
cid_type
[
-
2
]
==
'e'
:
self
.
cid_type
=
self
.
cid_type
.
replace
(
' '
,
''
)
sql
=
"select cid,count(cid) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}} and cid_type='{2}'
\
group by cid order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
clk_times
=
tuple2dict
(
con_sql
(
sql
))
return
clk_times
def
get_impression_times
(
self
):
# rtype : dict
if
self
.
cid_type
[
-
2
]
==
'e'
:
self
.
cid_type
=
self
.
cid_type
[:
-
6
]
+
' '
+
self
.
cid_type
[:
-
6
:]
sql
=
"select cid,count(cid) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}} and cid_type='{2}'
\
group by cid order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
imp_times
=
tuple2dict
(
con_sql
(
sql
))
return
imp_times
def
get_result
(
self
,
clk
,
imp
,
clk_n
=
2
,
result_types
=
"ctr"
):
"""
result_types : "clk";"imp";"ctr"
clk : dict
imp : dict
clk_n : 获取topN点击率时,过滤的点击数
rtype : list
"""
topn
=
[]
#获取topN的点击
if
imp
==
{}
or
result_types
==
"clk"
:
for
i
in
clk
:
if
self
.
cid_type
==
"diary"
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
cid_type
.
strip
(),
i
,
clk
[
i
],
0
,
0
,
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
#获取topN的曝光
elif
clk
==
{}
or
result_types
==
"imp"
:
for
i
in
imp
:
if
self
.
cid_type
==
"diary"
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
cid_type
.
strip
(),
i
,
0
,
imp
[
i
],
0
,
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
3
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
#获取topN的ctr
else
:
for
i
in
clk
:
if
i
in
imp
.
keys
()
and
clk
[
i
]
>
clk_n
:
if
self
.
cid_type
==
"diary"
:
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
cid_type
.
strip
(),
i
,
clk
[
i
],
imp
[
i
],
round
(
clk
[
i
]
/
imp
[
i
],
4
),
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
def
result2file
(
self
,
result_lst
,
fpath
):
"""
result_lst : [all,ios,android]
fpath : output filename
rtype : none
"""
with
open
(
fpath
,
'w'
)
as
f
:
tplt
=
"{0:
\u3000
<4}
\t
{1:
\u3000
<12}
\t
{2:
\u3000
^6}
\t
{3:
\u3000
^6}
\t
{4:
\u3000
<8}
\t
{5:
\u3000
^15}
\n
"
f
.
write
(
"Top {0} {1}
\n
"
.
format
(
self
.
top_n
,
self
.
cid_type
))
sep
=
"=================================================================
\n
"
header
=
tplt
.
format
(
"平台"
,
"{}_id"
.
format
(
self
.
cid_type
),
"点击数"
,
"曝光数"
,
"点击率"
,
"{}链接"
.
format
(
self
.
cid_type
))
f
.
write
(
sep
)
f
.
write
(
header
)
for
i
in
result_lst
:
for
j
in
i
:
f
.
write
(
tplt
.
format
(
j
[
0
],
j
[
1
],
j
[
2
],
j
[
3
],
j
[
4
],
j
[
5
]))
f
.
write
(
sep
)
if
i
!=
result_lst
[
-
1
]:
f
.
write
(
header
)
f
.
write
(
"
\n\n
"
)
def
main
():
top_diary
=
TopFeatures
(
1
,
"all"
,
"diary"
)
clk_diary_times_all
=
top_diary
.
get_click_times
()
imp_diary_times_all
=
top_diary
.
get_impression_times
()
clk_diary_ctr_all
=
top_diary
.
get_result
(
"ctr"
,
clk_diary_times_all
,
imp_diary_times_all
,
top_n
=
100
)
clk_diary_times_ios
=
top_diary
.
get_click_times
()
imp_diary_times_ios
=
top_diary
.
get_impression_times
()
clk_diary_ctr_ios
=
top_diary
.
get_result
(
"ctr"
,
clk_diary_times_ios
,
imp_diary_times_ios
,
top_n
=
100
)
clk_diary_times_android
=
top_diary
.
get_click_times
()
imp_diary_times_android
=
top_diary
.
get_impression_times
()
clk_diary_ctr_android
=
top_diary
.
get_result
(
"ctr"
,
clk_diary_times_android
,
imp_diary_times_android
,
top_n
=
100
)
result_lst
=
[
clk_diary_ctr_all
,
clk_diary_ctr_ios
,
clk_diary_ctr_android
]
output_path
=
DIRECTORY_PATH
+
"5top100_ctr_diary_
%
s.txt"
top_diary
.
result2file
(
result_lst
,
output_path
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
eda/test/utils.py
0 → 100644
View file @
31f72a07
# -*- coding: UTF-8 -*-
import
pymysql
import
datetime
def
con_sql
(
sql
):
#从数据库的表里获取数据
"""
:type sql : str
:rtype : tuple
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
tuple2dict
(
tuple_result
):
#把sql结果从tuple(tuple,tuple)格式转换成dict格式
"""
:type tuple_result : tuple
:rtype : dict
"""
dict_result
=
{}
for
i
in
range
(
len
(
tuple_result
)):
dict_result
[
tuple_result
[
i
][
0
]]
=
tuple_result
[
i
][
1
]
return
dict_result
def
get_yesterday_date
():
#自动获取昨天的日期,如"20180808"
"""
:rtype : str
"""
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
return
yesterday
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment