Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
62b8bc34
Commit
62b8bc34
authored
Aug 10, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a comment and new a class ClkCidUidRate
parent
b49bb083
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
82 additions
and
17 deletions
+82
-17
getClkCidUidRate.py
eda/test/getClkCidUidRate.py
+61
-0
getTopFeatures.py
eda/test/getTopFeatures.py
+21
-17
No files found.
eda/test/getClkCidUidRate.py
0 → 100644
View file @
62b8bc34
# -*- coding: UTF-8 -*-
from
utils
import
con_sql
,
tuple2dict
,
get_yesterday_date
from
config
import
DIRECTORY_PATH
class
ClkCidUidRate
(
object
):
def
__init__
(
self
,
ndays
,
platform
,
cid_type
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question';"everything";"nothing"...
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
self
.
platform
=
"='AppStore'"
elif
platform
==
"android"
:
self
.
platform
=
"!='AppStore'"
else
:
self
.
platform
=
" is not null"
self
.
cid_type
=
cid_type
def
get_clk_cid_uid_rate
(
self
,
platform
):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
pass
def
result2file
(
self
,
result_lst
,
fpath
):
pass
def
main
():
#1.点击diary用户占比
click_diary_all
=
ClkCidUidRate
(
1
,
"all"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_diary_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_android
=
ClkCidUidRate
(
1
,
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
#2.点击answer用户占比
click_answer_all
=
ClkCidUidRate
(
1
,
"all"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_answer_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_answer_android
=
ClkCidUidRate
(
1
,
"android"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
#3.点击question用户占比
click_question_all
=
ClkCidUidRate
(
1
,
"all"
,
"question"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_question_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"question"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_question_android
=
ClkCidUidRate
(
1
,
"android"
,
"question"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_question_result
=
[
click_question_all
,
click_question_ios
,
click_question_android
]
#4.有点击用户占比
click_everything_all
=
ClkCidUidRate
(
1
,
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_android
=
ClkCidUidRate
(
1
,
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_everything_result
=
[
click_everything_all
,
click_everything_ios
,
click_everything_android
]
#5.无点击用户占比
click_nothing_all
=
ClkCidUidRate
(
1
,
"all"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_nothing_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_nothing_android
=
ClkCidUidRate
(
1
,
"android"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_nothing_result
=
[
click_nothing_all
,
click_nothing_ios
,
click_nothing_android
]
eda/test/getTopFeatures.py
View file @
62b8bc34
...
...
@@ -6,10 +6,10 @@ from config import DIRECTORY_PATH
class
TopFeatures
(
object
):
def
__init__
(
self
,
ndays
,
platform
,
cid_type
,
top_n
=-
1
):
"""
ndays : 1;2;3;4..
ndays : 1;2;3;4..
#The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the rows of the result
top_n : the
top
rows of the result
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
...
...
@@ -36,7 +36,7 @@ class TopFeatures(object):
def
get_impression_times
(
self
):
# rtype : dict
if
self
.
platform
[
-
2
]
==
'e'
:
if
self
.
platform
[
-
2
]
==
'e'
:
#注意:曝光表中AppStore有空格
self
.
platform
=
self
.
platform
[:
-
6
]
+
' '
+
self
.
platform
[
-
6
:]
sql
=
"select cid,count(cid) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
...
...
@@ -45,12 +45,13 @@ class TopFeatures(object):
imp_times
=
tuple2dict
(
con_sql
(
sql
))
return
imp_times
def
get_result
(
self
,
clk
=
{},
imp
=
{},
clk_n
=
2
,
result_types
=
"ctr"
):
def
get_result
(
self
,
platform
,
clk
=
{},
imp
=
{},
clk_n
=
2
,
result_types
=
"ctr"
):
"""
platform : "所有";"苹果","安卓" #方便显示
clk : dict
imp : dict
clk_n : 获取topN点击率时,过滤的点击数
result_types :
"clk";"imp";"ctr"
result_types :
sorted by ["clk","imp","ctr"]
rtype : list
"""
topn
=
[]
...
...
@@ -61,7 +62,7 @@ class TopFeatures(object):
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
platform
.
strip
()
,
i
,
clk
[
i
],
0
,
0
,
url
))
topn
.
append
((
platform
,
i
,
clk
[
i
],
0
,
0
,
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
2
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
#获取topN的曝光
...
...
@@ -71,7 +72,7 @@ class TopFeatures(object):
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
platform
.
strip
()
,
i
,
0
,
imp
[
i
],
0
,
url
))
topn
.
append
((
platform
,
i
,
0
,
imp
[
i
],
0
,
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
3
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
#获取topN的ctr
...
...
@@ -82,7 +83,7 @@ class TopFeatures(object):
url
=
"http://m.igengmei.com/diary_book/"
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
else
:
url
=
"http://m.igengmei.com/{0}/"
.
format
(
self
.
cid_type
)
+
i
[
i
.
index
(
'|'
)
+
1
:]
+
'/'
topn
.
append
((
self
.
platform
.
strip
()
,
i
,
clk
[
i
],
imp
[
i
],
round
(
clk
[
i
]
/
imp
[
i
],
4
),
url
))
topn
.
append
((
platform
,
i
,
clk
[
i
],
imp
[
i
],
round
(
clk
[
i
]
/
imp
[
i
],
4
),
url
))
topn
.
sort
(
key
=
lambda
x
:
x
[
4
],
reverse
=
True
)
return
topn
[:
int
(
self
.
top_n
)]
...
...
@@ -117,58 +118,61 @@ def main():
top_diary_all
=
TopFeatures
(
1
,
"all"
,
"diary"
,
100
)
clk_diary_times_all
=
top_diary_all
.
get_click_times
()
imp_diary_times_all
=
top_diary_all
.
get_impression_times
()
clk_diary_ctr_all
=
top_diary_all
.
get_result
(
clk_diary_times_all
,
imp_diary_times_all
,
2
,
"ctr"
)
clk_diary_ctr_all
=
top_diary_all
.
get_result
(
"所有"
,
clk_diary_times_all
,
imp_diary_times_all
,
4
,
"ctr"
)
top_diary_ios
=
TopFeatures
(
1
,
"ios"
,
"diary"
,
100
)
clk_diary_times_ios
=
top_diary_ios
.
get_click_times
()
imp_diary_times_ios
=
top_diary_ios
.
get_impression_times
()
clk_diary_ctr_ios
=
top_diary_ios
.
get_result
(
clk_diary_times_ios
,
imp_diary_times_ios
,
2
,
"ctr"
)
clk_diary_ctr_ios
=
top_diary_ios
.
get_result
(
"苹果"
,
clk_diary_times_ios
,
imp_diary_times_ios
,
4
,
"ctr"
)
top_diary_android
=
TopFeatures
(
1
,
"android"
,
"diary"
,
100
)
clk_diary_times_android
=
top_diary_android
.
get_click_times
()
imp_diary_times_android
=
top_diary_android
.
get_impression_times
()
clk_diary_ctr_android
=
top_diary_android
.
get_result
(
clk_diary_times_android
,
imp_diary_times_android
,
2
,
"ctr"
)
clk_diary_ctr_android
=
top_diary_android
.
get_result
(
"安卓"
,
clk_diary_times_android
,
imp_diary_times_android
,
4
,
"ctr"
)
result_lst
=
[
clk_diary_ctr_all
,
clk_diary_ctr_ios
,
clk_diary_ctr_android
]
output_path
=
DIRECTORY_PATH
+
"top100_ctr_diary_{}.txt"
.
format
(
get_yesterday_date
())
top_diary_all
.
result2file
(
result_lst
,
output_path
)
print
(
"已获取 Top diary 特征"
)
#2. Top answer
top_answer_all
=
TopFeatures
(
1
,
"all"
,
"answer"
,
100
)
clk_answer_times_all
=
top_answer_all
.
get_click_times
()
imp_answer_times_all
=
top_answer_all
.
get_impression_times
()
clk_answer_ctr_all
=
top_answer_all
.
get_result
(
clk_answer_times_all
,
imp_answer_times_all
,
2
,
"ctr"
)
clk_answer_ctr_all
=
top_answer_all
.
get_result
(
"所有"
,
clk_answer_times_all
,
imp_answer_times_all
,
2
,
"ctr"
)
top_answer_ios
=
TopFeatures
(
1
,
"ios"
,
"answer"
,
100
)
clk_answer_times_ios
=
top_answer_ios
.
get_click_times
()
imp_answer_times_ios
=
top_answer_ios
.
get_impression_times
()
clk_answer_ctr_ios
=
top_answer_ios
.
get_result
(
clk_answer_times_ios
,
imp_answer_times_ios
,
2
,
"ctr"
)
clk_answer_ctr_ios
=
top_answer_ios
.
get_result
(
"苹果"
,
clk_answer_times_ios
,
imp_answer_times_ios
,
2
,
"ctr"
)
top_answer_android
=
TopFeatures
(
1
,
"android"
,
"answer"
,
100
)
clk_answer_times_android
=
top_answer_android
.
get_click_times
()
imp_answer_times_android
=
top_answer_android
.
get_impression_times
()
clk_answer_ctr_android
=
top_answer_android
.
get_result
(
clk_answer_times_android
,
imp_answer_times_android
,
2
,
"ctr"
)
clk_answer_ctr_android
=
top_answer_android
.
get_result
(
"安卓"
,
clk_answer_times_android
,
imp_answer_times_android
,
2
,
"ctr"
)
result_lst
=
[
clk_answer_ctr_all
,
clk_answer_ctr_ios
,
clk_answer_ctr_android
]
output_path
=
DIRECTORY_PATH
+
"top100_ctr_answer_{}.txt"
.
format
(
get_yesterday_date
())
top_answer_all
.
result2file
(
result_lst
,
output_path
)
print
(
"已获取 Top answer 特征"
)
#3. Top question
top_question_all
=
TopFeatures
(
1
,
"all"
,
"question"
,
100
)
clk_question_times_all
=
top_question_all
.
get_click_times
()
imp_question_times_all
=
top_question_all
.
get_impression_times
()
clk_question_ctr_all
=
top_question_all
.
get_result
(
clk_question_times_all
,
imp_question_times_all
,
2
,
"ctr"
)
clk_question_ctr_all
=
top_question_all
.
get_result
(
"所有"
,
clk_question_times_all
,
imp_question_times_all
,
2
,
"ctr"
)
top_question_ios
=
TopFeatures
(
1
,
"ios"
,
"question"
,
100
)
clk_question_times_ios
=
top_question_ios
.
get_click_times
()
imp_question_times_ios
=
top_question_ios
.
get_impression_times
()
clk_question_ctr_ios
=
top_question_ios
.
get_result
(
clk_question_times_ios
,
imp_question_times_ios
,
2
,
"ctr"
)
clk_question_ctr_ios
=
top_question_ios
.
get_result
(
"苹果"
,
clk_question_times_ios
,
imp_question_times_ios
,
2
,
"ctr"
)
top_question_android
=
TopFeatures
(
1
,
"android"
,
"question"
,
100
)
clk_question_times_android
=
top_question_android
.
get_click_times
()
imp_question_times_android
=
top_question_android
.
get_impression_times
()
clk_question_ctr_android
=
top_question_android
.
get_result
(
clk_question_times_android
,
imp_question_times_android
,
2
,
"ctr"
)
clk_question_ctr_android
=
top_question_android
.
get_result
(
"安卓"
,
clk_question_times_android
,
imp_question_times_android
,
2
,
"ctr"
)
print
(
"已获取 Top question 特征"
)
result_lst
=
[
clk_question_ctr_all
,
clk_question_ctr_ios
,
clk_question_ctr_android
]
output_path
=
DIRECTORY_PATH
+
"top100_ctr_question_{}.txt"
.
format
(
get_yesterday_date
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment