Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c8a1b61c
Commit
c8a1b61c
authored
Aug 20, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update recommented_index_v2
parent
2cce8dc0
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
348 additions
and
81 deletions
+348
-81
getClickZeroUidRateDetail.py
eda/recommended_indexs/getClickZeroUidRateDetail.py
+10
-16
getRegisterUidDetail.py
eda/recommended_indexs/getRegisterUidDetail.py
+7
-16
cidRate.py
eda/recommended_indexs_v2/cidRate.py
+0
-0
clkCidUidRate.py
eda/recommended_indexs_v2/clkCidUidRate.py
+0
-0
func.py
eda/recommended_indexs_v2/func.py
+98
-0
getClickZeroUidDetail.py
eda/recommended_indexs_v2/getClickZeroUidDetail.py
+145
-0
getRegisterUidDetail.py
eda/recommended_indexs_v2/getRegisterUidDetail.py
+88
-0
main.py
eda/recommended_indexs_v2/main.py
+0
-0
topFeatures.py
eda/recommended_indexs_v2/topFeatures.py
+0
-0
utils.py
eda/recommended_indexs_v2/utils.py
+0
-49
No files found.
eda/recommended_indexs/getClickZeroUidRateDetail.py
View file @
c8a1b61c
...
...
@@ -19,7 +19,11 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_rate_detail
(
platform
):
def
get_click_zero_uid_count
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
...
...
@@ -126,26 +130,16 @@ def get_rate_detail(platform):
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
result2dict
(
result
):
"""
result : tuple2
rtype : dict
"""
dct
=
{}
sum_count
=
0
for
i
in
result
:
sum_count
+=
i
[
1
]
for
i
in
result
:
dct
[
i
[
0
]]
=
"{}--{}
%
"
.
format
(
i
[
1
],
round
(
i
[
1
]
/
sum_count
*
100
,
2
))
print
(
"sum:{}"
.
format
(
sum_count
))
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
no_click_uid_detail_all
=
result2dict
(
get_rate_detail
(
"all"
)
)
no_click_uid_detail_ios
=
result2dict
(
get_rate_detail
(
"ios"
)
)
no_click_uid_detail_android
=
result2dict
(
get_rate_detail
(
"android"
)
)
no_click_uid_detail_all
=
get_click_zero_uid_count
(
"all"
)
no_click_uid_detail_ios
=
get_click_zero_uid_count
(
"ios"
)
no_click_uid_detail_android
=
get_click_zero_uid_count
(
"android"
)
eda/recommended_indexs/getRegisterUidDetail.py
View file @
c8a1b61c
...
...
@@ -19,7 +19,10 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_rate_detail
():
def
get_register_uid_count
():
"""
rtype : dict
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id))
\
...
...
@@ -72,26 +75,14 @@ def get_rate_detail():
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
result2dict
(
result
):
"""
result : tuple2
rtype : dict
"""
dct
=
{}
sum_count
=
0
for
i
in
result
:
sum_count
+=
i
[
1
]
for
i
in
result
:
dct
[
i
[
0
]]
=
"{}--{}
%
"
.
format
(
i
[
1
],
round
(
i
[
1
]
/
sum_count
*
100
,
2
))
print
(
"sum:{}"
.
format
(
sum_count
))
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
register_uid_detail_all
=
result2dict
(
get_rate_detail
())
register_uid_detail_ios
=
result2dict
(
get_rate_detail
())
register_uid_detail_android
=
result2dict
(
get_rate_detail
())
register_uid_detail
=
get_register_uid_count
()
eda/recommended_indexs_v2/
getC
idRate.py
→
eda/recommended_indexs_v2/
c
idRate.py
View file @
c8a1b61c
File moved
eda/recommended_indexs_v2/
getC
lkCidUidRate.py
→
eda/recommended_indexs_v2/
c
lkCidUidRate.py
View file @
c8a1b61c
File moved
eda/recommended_indexs_v2/func.py
0 → 100644
View file @
c8a1b61c
from
utils
import
con_sql
from
getClickZeroUidDetail
import
get_click_zero_uid_count
from
getRegisterUidDetail
import
get_register_uid_count
#获取各个平台下的活跃用户点击率
def
get_activate_uid_ctr
(
platform
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{2})
\
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
platform
=
"苹果"
elif
platform
==
"!='App Store'"
:
platform
=
"安卓"
else
:
platform
=
"所有"
return
[
platform
,
clk_count
,
imp_count
,
clk_rate
]
#获取活跃用户平均每天曝光次数
def
get_activate_uid_imp_times
(
city
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
city : 'beijing';'all'
rtype : list
"""
if
city
==
"beijing"
:
city
=
"='beijing'"
else
:
city
=
" is not null"
sql_uid
=
"select count(distinct(device_id)) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
sql_uid_count
=
con_sql
(
sql_uid
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1})
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
sql_imp_times
=
con_sql
(
sql_imp
)[
0
][
0
]
if
city
==
"beijing"
:
city
=
"北京"
else
:
city
=
"所有"
return
[
city
,
sql_uid_count
,
sql_imp_times
,
round
(
sql_imp_times
/
sql_uid_count
,
2
)]
#获取无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) ;并且根据平台和激活日记来分
def
get_click_zero_uid_rate_detail
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
dct1
=
get_click_zero_uid_count
(
platform
)
dct2
=
get_register_uid_count
()
result
=
{}
for
k
in
dct1
:
result
[
k
]
=
dct1
[
k
]
/
dct2
[
k
]
return
result
#获取 (用户点击次数 : 独立用户数)
def
get_click_times_to_count_uid
():
"""
rtype : tuple
"""
sql
=
"select times,count(device_id)
\
from (select device_id,count(cid_type) as times
\
from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
group by device_id) as t
\
group by times order by times"
result
=
con_sql
(
sql
)
return
result
\ No newline at end of file
eda/recommended_indexs_v2/getClickZeroUidDetail.py
0 → 100644
View file @
c8a1b61c
import
datetime
import
pymysql
#一周之前的timestamp(7)
my_date1
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
7
)
my_tm1
=
int
(
my_date1
.
strftime
(
"
%
s"
))
#二周之前的timestamp(14)
my_date2
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
14
)
my_tm2
=
int
(
my_date2
.
strftime
(
"
%
s"
))
#一个月之前的timestamp(30)
my_date3
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
30
)
my_tm3
=
int
(
my_date3
.
strftime
(
"
%
s"
))
#两个月之前的timestamp(60)
my_date4
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
60
)
my_tm4
=
int
(
my_date4
.
strftime
(
"
%
s"
))
#三个月之前的timestamp(90)
my_date5
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
90
)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_click_zero_uid_count
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2}))
\
union all
\
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2}))
\
union all
\
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3}))
\
union all
\
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {5})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4}))
\
union all
\
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {6})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {5}))
\
union all
\
select '90+' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {6}))"
.
format
(
platform
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
my_tm1
,
my_tm2
,
my_tm3
,
my_tm4
,
my_tm5
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
dct
=
{}
for
i
in
result
:
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
no_click_uid_detail_all
=
get_click_zero_uid_count
(
"all"
)
no_click_uid_detail_ios
=
get_click_zero_uid_count
(
"ios"
)
no_click_uid_detail_android
=
get_click_zero_uid_count
(
"android"
)
eda/recommended_indexs_v2/getRegisterUidDetail.py
0 → 100644
View file @
c8a1b61c
import
datetime
import
pymysql
#一周之前的timestamp(7)
my_date1
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
7
)
my_tm1
=
int
(
my_date1
.
strftime
(
"
%
s"
))
#二周之前的timestamp(14)
my_date2
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
14
)
my_tm2
=
int
(
my_date2
.
strftime
(
"
%
s"
))
#一个月之前的timestamp(30)
my_date3
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
30
)
my_tm3
=
int
(
my_date3
.
strftime
(
"
%
s"
))
#两个月之前的timestamp(60)
my_date4
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
60
)
my_tm4
=
int
(
my_date4
.
strftime
(
"
%
s"
))
#三个月之前的timestamp(90)
my_date5
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
90
)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_register_uid_count
():
"""
rtype : dict
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {0})
\
union all
\
select '7-14' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {1})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {0})
\
union all
\
select '14-30' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {1})
\
union all
\
select '30-60' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2})
\
union all
\
select '60-90' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
union all
\
select '90+' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})"
.
format
(
my_tm1
,
my_tm2
,
my_tm3
,
my_tm4
,
my_tm5
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
dct
=
{}
for
i
in
result
:
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
register_uid_detail
=
get_register_uid_count
()
eda/recommended_indexs_v2/main.py
View file @
c8a1b61c
This diff is collapsed.
Click to expand it.
eda/recommended_indexs_v2/
getT
opFeatures.py
→
eda/recommended_indexs_v2/
t
opFeatures.py
View file @
c8a1b61c
File moved
eda/recommended_indexs_v2/utils.py
View file @
c8a1b61c
...
...
@@ -35,52 +35,3 @@ def get_yesterday_date():
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
return
yesterday
#获取各个平台下的活跃用户点击率
def
get_activate_uid_ctr
(
platform
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{2})
\
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
platform
=
"苹果"
elif
platform
==
"!='App Store'"
:
platform
=
"安卓"
else
:
platform
=
"所有"
return
[
platform
,
clk_count
,
imp_count
,
clk_rate
]
#获取 (点击次数 : 独立用户数)
def
get_click_times_to_count_uid_df
():
"""
rtype : tuple
"""
sql
=
"select times,count(device_id)
\
from (select device_id,count(cid_type) as times
\
from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
group by device_id) as t
\
group by times order by times"
result
=
con_sql
(
sql
)
return
result
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment