Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
028e45f8
Commit
028e45f8
authored
Aug 13, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bug in test/platform format [is not null]
parent
5f50a255
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
23 additions
and
13 deletions
+23
-13
getCidRate.py
eda/test/getCidRate.py
+2
-2
getClkCidUidRate.py
eda/test/getClkCidUidRate.py
+1
-3
getTopFeatures.py
eda/test/getTopFeatures.py
+1
-1
main.py
eda/test/main.py
+17
-5
utils.py
eda/test/utils.py
+2
-2
No files found.
eda/test/getCidRate.py
View file @
028e45f8
...
...
@@ -27,11 +27,11 @@ class CidRate(object):
sql_cid
=
"select count(cid) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}
\
and cid_type='{2}'"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
),
self
.
cid_type
)
and cid_type='{2}'"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
0
]
==
'A'
else
self
.
platform
,
self
.
cid_type
)
cid_clk_count
=
con_sql
(
sql_cid
)[
0
][
0
]
sql_all
=
"select count(cid) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
))
and device_type{1}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
0
]
==
'A'
else
self
.
platform
)
all_clk_count
=
con_sql
(
sql_all
)[
0
][
0
]
cid_clk_rate
=
round
(
cid_clk_count
/
all_clk_count
,
4
)
return
[
platform
,
cid_clk_count
,
all_clk_count
,
cid_clk_rate
]
...
...
eda/test/getClkCidUidRate.py
View file @
028e45f8
...
...
@@ -29,11 +29,9 @@ class ClkCidUidRate(object):
sql_clk
=
"select count(distinct(device_id)) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}
\
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
),
self
.
cid_type
)
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
0
]
==
'A'
else
self
.
platform
,
self
.
cid_type
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
if
self
.
platform
[
-
2
]
==
'e'
:
#注意:曝光表中AppStore有空格
self
.
platform
=
self
.
platform
[:
-
6
]
+
' '
+
self
.
platform
[
-
6
:]
sql_imp
=
"select count(distinct(device_id)) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}
\
...
...
eda/test/getTopFeatures.py
View file @
028e45f8
...
...
@@ -28,7 +28,7 @@ class TopFeatures(object):
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1} and cid_type='{2}'
\
group by cid
\
order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
),
self
.
cid_type
)
order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
0
]
==
'A'
else
self
.
platform
,
self
.
cid_type
)
clk_times
=
tuple2dict
(
con_sql
(
sql
))
return
clk_times
...
...
eda/test/main.py
View file @
028e45f8
...
...
@@ -18,27 +18,34 @@ def main():
answer_imp_rate_result
=
[
answer_imp_rate_all
,
answer_imp_rate_ios
,
answer_imp_rate_android
]
print
(
"已获取answer曝光占比"
)
#1.2 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
#1.2 diary曝光占比(=answer被曝光数/总cid被曝光数)
diary_imp_rate_all
=
CidRate
(
"all"
,
"diary"
)
.
get_cid_imp_rate
(
"所有"
)
diary_imp_rate_ios
=
CidRate
(
"ios"
,
"diary"
)
.
get_cid_imp_rate
(
"苹果"
)
diary_imp_rate_android
=
CidRate
(
"android"
,
"diary"
)
.
get_cid_imp_rate
(
"安卓"
)
diary_imp_rate_result
=
[
diary_imp_rate_all
,
diary_imp_rate_ios
,
diary_imp_rate_android
]
print
(
"已获取diary曝光占比"
)
#1.3 活跃用户点击率(=活跃用户点击次数/活跃用户曝光次数)
activate_uid_ctr_all
=
get_activate_uid_ctr
(
"all"
)
activate_uid_ctr_ios
=
get_activate_uid_ctr
(
"ios"
)
activate_uid_ctr_android
=
get_activate_uid_ctr
(
"android"
)
print
(
"已获取活跃用户点击率"
)
#1.
3
点击answer用户占比(=点击answer用户数/曝光answer用户数)
#1.
4
点击answer用户占比(=点击answer用户数/曝光answer用户数)
click_answer_all
=
ClkCidUidRate
(
"all"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_answer_ios
=
ClkCidUidRate
(
"ios"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_answer_android
=
ClkCidUidRate
(
"android"
,
"answer"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_answer_result
=
[
click_answer_all
,
click_answer_ios
,
click_answer_android
]
print
(
"已获取点击answer用户占比"
)
#1.
4
点击diary用户占比(=点击diary用户数/曝光diary用户数)
#1.
5
点击diary用户占比(=点击diary用户数/曝光diary用户数)
click_diary_all
=
ClkCidUidRate
(
"all"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_diary_ios
=
ClkCidUidRate
(
"ios"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_diary_android
=
ClkCidUidRate
(
"android"
,
"diary"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
click_diary_result
=
[
click_diary_all
,
click_diary_ios
,
click_diary_android
]
print
(
"已获取点击diary用户占比"
)
#1.
5
有点击用户占比(=有点击用户数/有曝光用户数)
#1.
6
有点击用户占比(=有点击用户数/有曝光用户数)
click_everything_all
=
ClkCidUidRate
(
"all"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"所有"
)
click_everything_ios
=
ClkCidUidRate
(
"ios"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"苹果"
)
click_everything_android
=
ClkCidUidRate
(
"android"
,
"everything"
)
.
get_clk_cid_uid_rate
(
"安卓"
)
...
...
@@ -46,7 +53,6 @@ def main():
print
(
"已获取有点击用户占比"
)
#2. Top特征
#2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
df
=
get_click_times_to_count_uid_df
()
...
...
@@ -71,6 +77,12 @@ def main():
output_path
=
DIRECTORY_PATH
+
"result_{}.txt"
.
format
(
get_yesterday_date
())
with
open
(
output_path
,
'w'
)
as
f
:
if
__name__
==
'__main__'
:
main
()
...
...
eda/test/utils.py
View file @
028e45f8
...
...
@@ -52,7 +52,7 @@ def get_activate_uid_ctr(platform, ndays=1):
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
))
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
0
]
==
'A'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
...
...
@@ -60,7 +60,7 @@ def get_activate_uid_ctr(platform, ndays=1):
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{2})
\
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
),
platform
)
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
0
]
==
'A'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment