Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
b20353fd
Commit
b20353fd
authored
Aug 09, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
modify add_data_to_redis
parents
5b36cb84
a6f24bd0
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
26 additions
and
20 deletions
+26
-20
getClickTimes2CountUid.py
eda/recommended_indexs/code/getClickTimes2CountUid.py
+2
-2
getRate.py
eda/recommended_indexs/code/getRate.py
+19
-2
getTop100Answer.py
eda/recommended_indexs/code/getTop100Answer.py
+1
-5
getTop100Diary.py
eda/recommended_indexs/code/getTop100Diary.py
+1
-5
getTop100Question.py
eda/recommended_indexs/code/getTop100Question.py
+3
-6
No files found.
eda/recommended_indexs/code/getClickTimes2CountUid.py
View file @
b20353fd
...
...
@@ -43,11 +43,11 @@ def df2file(df,fpath):
def
main
():
print
(
"2.开始获取
用户点击次数表
..."
)
print
(
"2.开始获取
Top特征
..."
)
output_path
=
"/data2/models/eda/recommended_indexs/2click_times_to_count_uid_
%
s.txt"
%
get_yesterday_date
()
df
=
get_click_times_to_count_uid_df
()
df2file
(
df
,
output_path
)
print
(
"
获取完成
"
)
print
(
"
2.1已将用户点击次数分布存入文件
"
)
...
...
eda/recommended_indexs/code/getRate.py
View file @
b20353fd
...
...
@@ -16,7 +16,24 @@ def get_yesterday_date():
def
result2file
(
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
tplt
=
"{0:
\u3000
<6}
\t
{1:
\u3000
<15}
\t
{2:
\u3000
<15}
\t
{3:
\u3000
<15}
\n
"
f
.
write
(
"#注意:以下数据都是首页的
\n
"
)
line
=
"""内容概览:以下所有数据都是首页的
1. 比例特征
1.1 answer曝光占比(=answer被曝光数/总cid被曝光数)
1.2 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
1.3 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.4 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.5 无点击用户占比(=无点击用户数/有曝光用户数)
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary(sorted by ctr)
2.3 Top 100 Answer(sorted by ctr)
2.4 Top 100 Question(sorted by click times)
具体内容:以下所有数据都是首页的
"""
f
.
write
(
line
)
f
.
write
(
"#1. 比例特征
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
"#1.1answer曝光占比(=answer被曝光数/总cid被曝光数)
\n
"
)
...
...
@@ -84,7 +101,7 @@ def main():
output_path
=
"/data2/models/eda/recommended_indexs/1rate_features_
%
s.txt"
%
get_yesterday_date
()
print
(
"开始获取比例特征..."
)
result2file
(
output_path
)
print
(
"已
将所有比例特征存入文件
"
)
print
(
"已
完成所有比例特征提取
"
)
if
__name__
==
'__main__'
:
...
...
eda/recommended_indexs/code/getTop100Answer.py
View file @
b20353fd
...
...
@@ -107,21 +107,17 @@ def get_android_top100_answer_rate_by_ctr(android_answer_count_by_click,android_
return
android_top100_answer_rate_by_ctr
[:
100
]
if
len
(
android_top100_answer_rate_by_ctr
)
>
100
else
android_top100_answer_rate_by_ctr
if
__name__
==
"__main__"
:
print
(
"开始获取top100点击率的answer..."
)
all_answer_count_by_click
=
get_all_answer_count_by_click
()
all_answer_count_by_imp
=
get_all_answer_count_by_imp
()
all_top100_answer_rate_by_ctr
=
get_all_top100_answer_rate_by_ctr
(
all_answer_count_by_click
,
all_answer_count_by_imp
)
print
(
"4.1已获得所有平台的top100点击率的answer"
)
ios_answer_count_by_click
=
get_ios_answer_count_by_click
()
ios_answer_count_by_imp
=
get_ios_answer_count_by_imp
()
ios_top100_answer_rate_by_ctr
=
get_ios_top100_answer_rate_by_ctr
(
ios_answer_count_by_click
,
ios_answer_count_by_imp
)
print
(
"4.2已获得ios平台的top100点击率的answer"
)
android_answer_count_by_click
=
get_android_answer_rate_by_click
()
android_answer_count_by_imp
=
get_android_answer_rate_by_imp
()
android_top100_answer_rate_by_ctr
=
get_android_top100_answer_rate_by_ctr
(
android_answer_count_by_click
,
android_answer_count_by_imp
)
print
(
"4.3已获得安卓平台的top100点击率的answer"
)
result_lst
=
[
all_top100_answer_rate_by_ctr
,
ios_top100_answer_rate_by_ctr
,
android_top100_answer_rate_by_ctr
]
today
=
datetime
.
date
.
today
()
...
...
@@ -129,4 +125,4 @@ if __name__ == "__main__":
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
output_path
=
"/data2/models/eda/recommended_indexs/4top100_ctr_answer_
%
s.txt"
%
yesterday
result2file
(
result_lst
,
output_path
)
print
(
"已将top100点击率的answer存入文件"
)
print
(
"
2.3
已将top100点击率的answer存入文件"
)
eda/recommended_indexs/code/getTop100Diary.py
View file @
b20353fd
...
...
@@ -106,21 +106,17 @@ def get_android_top100_diary_rate_by_ctr(android_top100_diary_count_by_click,and
return
android_top100_diary_rate_by_ctr
[:
100
]
if
len
(
android_top100_diary_rate_by_ctr
)
>
100
else
android_top100_diary_rate_by_ctr
if
__name__
==
"__main__"
:
print
(
"开始获取top100点击率的diary..."
)
all_diary_count_by_click
=
get_all_diary_count_by_click
()
all_diary_count_by_imp
=
get_all_diary_count_by_imp
()
all_top100_diary_rate_by_ctr
=
get_all_top100_diary_rate_by_ctr
(
all_diary_count_by_click
,
all_diary_count_by_imp
)
print
(
"3.1已获得所有平台的top100点击率的diary"
)
ios_diary_count_by_click
=
get_ios_diary_count_by_click
()
ios_diary_count_by_imp
=
get_ios_diary_count_by_imp
()
ios_top100_diary_rate_by_ctr
=
get_ios_top100_diary_rate_by_ctr
(
ios_diary_count_by_click
,
ios_diary_count_by_imp
)
print
(
"3.2已获得ios平台的top100点击率的diary"
)
android_diary_count_by_click
=
get_android_diary_rate_by_click
()
android_diary_count_by_imp
=
get_android_diary_rate_by_imp
()
android_top100_diary_rate_by_ctr
=
get_android_top100_diary_rate_by_ctr
(
android_diary_count_by_click
,
android_diary_count_by_imp
)
print
(
"3.3已获得安卓平台的top100点击率的diary"
)
result_lst
=
[
all_top100_diary_rate_by_ctr
,
ios_top100_diary_rate_by_ctr
,
android_top100_diary_rate_by_ctr
]
today
=
datetime
.
date
.
today
()
...
...
@@ -128,4 +124,4 @@ if __name__ == "__main__":
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
output_path
=
"/data2/models/eda/recommended_indexs/3top100_ctr_diary_
%
s.txt"
%
yesterday
result2file
(
result_lst
,
output_path
)
print
(
"已将top100点击率的diary存入文件"
)
print
(
"
2.2
已将top100点击率的diary存入文件"
)
eda/recommended_indexs/code/getTop100Question.py
View file @
b20353fd
...
...
@@ -21,7 +21,7 @@ def tuple2dict(tuple_result):
def
result2file
(
result_lst
,
fpath
):
with
open
(
fpath
,
'w'
)
as
f
:
tplt
=
"{0:
\u3000
<4}
\t
{1:
\u3000
<12}
\t
{2:
\u3000
^6}
\t
{3:
\u3000
^6}
\t
{4:
\u3000
<8}
\t
{5:
\u3000
^15}
\n
"
f
.
write
(
"Top 100 Question
\n
"
)
f
.
write
(
"Top 100 Question
(曝光表里cid类型没有question,因此下面的曝光数和点击率都为0)
\n
"
)
f
.
write
(
"=================================================================
\n
"
)
f
.
write
(
tplt
.
format
(
"平台"
,
"question_id"
,
"点击数"
,
"曝光数"
,
"点击率"
,
"question链接"
))
for
i
in
result_lst
:
...
...
@@ -128,21 +128,17 @@ def get_android_top100_question_rate_by_ctr(android_question_count_by_click,andr
return
android_top100_question_rate_by_ctr
[:
100
]
if
len
(
android_top100_question_rate_by_ctr
)
>
100
else
android_top100_question_rate_by_ctr
if
__name__
==
"__main__"
:
print
(
"开始获取top100点击率的question..."
)
all_question_count_by_click
=
get_all_question_count_by_click
()
all_question_count_by_imp
=
get_all_question_count_by_imp
()
all_top100_question_rate_by_ctr
=
get_all_top100_question_rate_by_ctr
(
all_question_count_by_click
,
all_question_count_by_imp
)
print
(
"5.1已获得所有平台的top100点击率的question"
)
ios_question_count_by_click
=
get_ios_question_count_by_click
()
ios_question_count_by_imp
=
get_ios_question_count_by_imp
()
ios_top100_question_rate_by_ctr
=
get_ios_top100_question_rate_by_ctr
(
ios_question_count_by_click
,
ios_question_count_by_imp
)
print
(
"5.2已获得ios平台的top100点击率的question"
)
android_question_count_by_click
=
get_android_question_rate_by_click
()
android_question_count_by_imp
=
get_android_question_rate_by_imp
()
android_top100_question_rate_by_ctr
=
get_android_top100_question_rate_by_ctr
(
android_question_count_by_click
,
android_question_count_by_imp
)
print
(
"5.3已获得安卓平台的top100点击率的question"
)
result_lst
=
[
all_top100_question_rate_by_ctr
,
ios_top100_question_rate_by_ctr
,
android_top100_question_rate_by_ctr
]
today
=
datetime
.
date
.
today
()
...
...
@@ -150,4 +146,5 @@ if __name__ == "__main__":
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
output_path
=
"/data2/models/eda/recommended_indexs/5top100_ctr_question_
%
s.txt"
%
yesterday
result2file
(
result_lst
,
output_path
)
print
(
"已将top100点击率的question存入文件"
)
print
(
"2.4已将top100点击率的question存入文件"
)
print
(
"已完成所有Top特征提取"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment