Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
716da1d4
Commit
716da1d4
authored
Aug 10, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add a new class getClkCidUidRate.py
parent
3a6de1e5
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
42 additions
and
16 deletions
+42
-16
getClickZeroUidRate.py
eda/recommended_indexs/code/getClickZeroUidRate.py
+0
-1
getClkCidUidRate.py
eda/test/getClkCidUidRate.py
+41
-14
getTopFeatures.py
eda/test/getTopFeatures.py
+1
-1
No files found.
eda/recommended_indexs/code/getClickZeroUidRate.py
View file @
716da1d4
...
...
@@ -12,7 +12,6 @@ def con_sql(sql):
return
result
#1 获取所有平台的0点击用户占比
def
get_all_click_zero_rate
():
sql
=
"select count(distinct(device_id)) from data_feed_click where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)"
...
...
eda/test/getClkCidUidRate.py
View file @
716da1d4
...
...
@@ -8,7 +8,7 @@ class ClkCidUidRate(object):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question';"
everything";"
nothing"...
cid_type : 'diary';'answer';'question';"nothing"...
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
...
...
@@ -17,14 +17,37 @@ class ClkCidUidRate(object):
self
.
platform
=
"!='AppStore'"
else
:
self
.
platform
=
" is not null"
self
.
cid_type
=
cid_type
if
cid_type
==
"nothing"
:
self
.
cid_type
=
" is not null"
else
:
self
.
cid_type
=
"='"
+
cid_type
+
"'"
def
get_clk_cid_uid_rate
(
self
,
platform
):
"""
platform : "所有";"苹果","安卓" #方便显示
rtype : list
"""
pass
if
self
.
platform
[
-
2
]
==
'e'
:
self
.
platform
=
self
.
platform
.
replace
(
' '
,
''
)
sql_clk
=
"select count(distinct(device_id)) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}
\
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
if
self
.
platform
[
-
2
]
==
'e'
:
#注意:曝光表中AppStore有空格
self
.
platform
=
self
.
platform
[:
-
6
]
+
' '
+
self
.
platform
[
-
6
:]
sql_imp
=
"select count(distinct(device_id)) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}
\
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
if
self
.
cid_type
==
" is not null"
:
no_clk_count
=
imp_count
-
clk_count
return
[
platform
,
no_clk_count
,
imp_count
,
round
(
no_clk_count
/
imp_count
,
4
)]
else
:
return
[
platform
,
clk_count
,
imp_count
,
round
(
clk_count
/
imp_count
,
4
)]
def
result2file
(
self
,
result_lst
,
fpath
):
...
...
@@ -38,24 +61,28 @@ def main():
click_diary_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_android
=
ClkCidUidRate
(
1
,
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
print
(
"已获取点击diary用户占比"
)
#2.点击answer用户占比
click_answer_all
=
ClkCidUidRate
(
1
,
"all"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_answer_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_answer_android
=
ClkCidUidRate
(
1
,
"android"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
#3.点击question用户占比
click_question_all
=
ClkCidUidRate
(
1
,
"all"
,
"question"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_question_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"question"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_question_android
=
ClkCidUidRate
(
1
,
"android"
,
"question"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_question_result
=
[
click_question_all
,
click_question_ios
,
click_question_android
]
#4.有点击用户占比
click_everything_all
=
ClkCidUidRate
(
1
,
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_android
=
ClkCidUidRate
(
1
,
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_everything_result
=
[
click_everything_all
,
click_everything_ios
,
click_everything_android
]
#5.无点击用户占比
print
(
"已获取点击answer用户占比"
)
#3.点击question用户占比(曝光表里cid类型没有question,因此下面的曝光数为0,0不能作分母)
#click_question_all = ClkCidUidRate(1,"all","question").get_clk_cid_uid_rate("所有")
#click_question_ios = ClkCidUidRate(1,"ios","question").get_clk_cid_uid_rate("苹果")
#click_question_android = ClkCidUidRate(1,"android","question").get_clk_cid_uid_rate("安卓")
#click_question_result = [click_question_all,click_question_ios,click_question_android]
#print("已获取点击question用户占比")
#4.无点击用户占比
click_nothing_all
=
ClkCidUidRate
(
1
,
"all"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_nothing_ios
=
ClkCidUidRate
(
1
,
"ios"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_nothing_android
=
ClkCidUidRate
(
1
,
"android"
,
"nothing"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_nothing_result
=
[
click_nothing_all
,
click_nothing_ios
,
click_nothing_android
]
print
(
"已获取无点击用户占比"
)
if
__name__
==
'__main__'
:
main
()
eda/test/getTopFeatures.py
View file @
716da1d4
...
...
@@ -172,11 +172,11 @@ def main():
clk_question_times_android
=
top_question_android
.
get_click_times
()
imp_question_times_android
=
top_question_android
.
get_impression_times
()
clk_question_ctr_android
=
top_question_android
.
get_result
(
"安卓"
,
clk_question_times_android
,
imp_question_times_android
,
2
,
"ctr"
)
print
(
"已获取 Top question 特征"
)
result_lst
=
[
clk_question_ctr_all
,
clk_question_ctr_ios
,
clk_question_ctr_android
]
output_path
=
DIRECTORY_PATH
+
"top100_ctr_question_{}.txt"
.
format
(
get_yesterday_date
())
top_question_all
.
result2file
(
result_lst
,
output_path
)
print
(
"已获取 Top question 特征"
)
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment