Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
4758923b
Commit
4758923b
authored
Aug 20, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
add new file
parents
11eee554
c8a1b61c
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
349 additions
and
82 deletions
+349
-82
getClickZeroUidRateDetail.py
eda/recommended_indexs/getClickZeroUidRateDetail.py
+10
-16
getRate.py
eda/recommended_indexs/getRate.py
+1
-1
getRegisterUidDetail.py
eda/recommended_indexs/getRegisterUidDetail.py
+7
-16
cidRate.py
eda/recommended_indexs_v2/cidRate.py
+0
-0
clkCidUidRate.py
eda/recommended_indexs_v2/clkCidUidRate.py
+0
-0
func.py
eda/recommended_indexs_v2/func.py
+98
-0
getClickZeroUidDetail.py
eda/recommended_indexs_v2/getClickZeroUidDetail.py
+145
-0
getRegisterUidDetail.py
eda/recommended_indexs_v2/getRegisterUidDetail.py
+88
-0
main.py
eda/recommended_indexs_v2/main.py
+0
-0
topFeatures.py
eda/recommended_indexs_v2/topFeatures.py
+0
-0
utils.py
eda/recommended_indexs_v2/utils.py
+0
-49
No files found.
eda/recommended_indexs/getClickZeroUidRateDetail.py
View file @
4758923b
...
...
@@ -19,7 +19,11 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_rate_detail
(
platform
):
def
get_click_zero_uid_count
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
...
...
@@ -126,26 +130,16 @@ def get_rate_detail(platform):
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
result2dict
(
result
):
"""
result : tuple2
rtype : dict
"""
dct
=
{}
sum_count
=
0
for
i
in
result
:
sum_count
+=
i
[
1
]
for
i
in
result
:
dct
[
i
[
0
]]
=
"{}--{}
%
"
.
format
(
i
[
1
],
round
(
i
[
1
]
/
sum_count
*
100
,
2
))
print
(
"sum:{}"
.
format
(
sum_count
))
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
no_click_uid_detail_all
=
result2dict
(
get_rate_detail
(
"all"
)
)
no_click_uid_detail_ios
=
result2dict
(
get_rate_detail
(
"ios"
)
)
no_click_uid_detail_android
=
result2dict
(
get_rate_detail
(
"android"
)
)
no_click_uid_detail_all
=
get_click_zero_uid_count
(
"all"
)
no_click_uid_detail_ios
=
get_click_zero_uid_count
(
"ios"
)
no_click_uid_detail_android
=
get_click_zero_uid_count
(
"android"
)
eda/recommended_indexs/getRate.py
View file @
4758923b
...
...
@@ -27,7 +27,7 @@ def result2file(fpath):
1.5 点击answer用户占比(=点击answer用户数/曝光answer用户数)
1.6 点击diary用户占比(=点击diary用户数/曝光diary用户数)
1.7 无点击用户占比(=无点击用户数/有曝光用户数)
1.8 无点击用户数分布(
根据激活日期和平台来分
) #注意:(]里面的数字指的是距离当前时间的天数
1.8 无点击用户数分布(
=无点击用户∩激活用户数 / 激活用户数
) #注意:(]里面的数字指的是距离当前时间的天数
2.Top特征
2.1 用户点击次数分布(第一列:用户点击次数;第二列:独立用户数量)
2.2 Top 100 diary (sorted by ctr)
...
...
eda/recommended_indexs/getRegisterUidDetail.py
View file @
4758923b
...
...
@@ -19,7 +19,10 @@ my_date5 = datetime.date.today() - datetime.timedelta(days=90)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_rate_detail
():
def
get_register_uid_count
():
"""
rtype : dict
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id))
\
...
...
@@ -72,26 +75,14 @@ def get_rate_detail():
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
result2dict
(
result
):
"""
result : tuple2
rtype : dict
"""
dct
=
{}
sum_count
=
0
for
i
in
result
:
sum_count
+=
i
[
1
]
for
i
in
result
:
dct
[
i
[
0
]]
=
"{}--{}
%
"
.
format
(
i
[
1
],
round
(
i
[
1
]
/
sum_count
*
100
,
2
))
print
(
"sum:{}"
.
format
(
sum_count
))
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
register_uid_detail_all
=
result2dict
(
get_rate_detail
())
register_uid_detail_ios
=
result2dict
(
get_rate_detail
())
register_uid_detail_android
=
result2dict
(
get_rate_detail
())
register_uid_detail
=
get_register_uid_count
()
eda/recommended_indexs_v2/
getC
idRate.py
→
eda/recommended_indexs_v2/
c
idRate.py
View file @
4758923b
File moved
eda/recommended_indexs_v2/
getC
lkCidUidRate.py
→
eda/recommended_indexs_v2/
c
lkCidUidRate.py
View file @
4758923b
File moved
eda/recommended_indexs_v2/func.py
0 → 100644
View file @
4758923b
from
utils
import
con_sql
from
getClickZeroUidDetail
import
get_click_zero_uid_count
from
getRegisterUidDetail
import
get_register_uid_count
#获取各个平台下的活跃用户点击率
def
get_activate_uid_ctr
(
platform
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{2})
\
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
platform
=
"苹果"
elif
platform
==
"!='App Store'"
:
platform
=
"安卓"
else
:
platform
=
"所有"
return
[
platform
,
clk_count
,
imp_count
,
clk_rate
]
#获取活跃用户平均每天曝光次数
def
get_activate_uid_imp_times
(
city
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
city : 'beijing';'all'
rtype : list
"""
if
city
==
"beijing"
:
city
=
"='beijing'"
else
:
city
=
" is not null"
sql_uid
=
"select count(distinct(device_id)) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
sql_uid_count
=
con_sql
(
sql_uid
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1})
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
sql_imp_times
=
con_sql
(
sql_imp
)[
0
][
0
]
if
city
==
"beijing"
:
city
=
"北京"
else
:
city
=
"所有"
return
[
city
,
sql_uid_count
,
sql_imp_times
,
round
(
sql_imp_times
/
sql_uid_count
,
2
)]
#获取无点击用户数分布(=无点击用户∩激活用户数 / 激活用户数) ;并且根据平台和激活日记来分
def
get_click_zero_uid_rate_detail
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
dct1
=
get_click_zero_uid_count
(
platform
)
dct2
=
get_register_uid_count
()
result
=
{}
for
k
in
dct1
:
result
[
k
]
=
dct1
[
k
]
/
dct2
[
k
]
return
result
#获取 (用户点击次数 : 独立用户数)
def
get_click_times_to_count_uid
():
"""
rtype : tuple
"""
sql
=
"select times,count(device_id)
\
from (select device_id,count(cid_type) as times
\
from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
group by device_id) as t
\
group by times order by times"
result
=
con_sql
(
sql
)
return
result
\ No newline at end of file
eda/recommended_indexs_v2/getClickZeroUidDetail.py
0 → 100644
View file @
4758923b
import
datetime
import
pymysql
#一周之前的timestamp(7)
my_date1
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
7
)
my_tm1
=
int
(
my_date1
.
strftime
(
"
%
s"
))
#二周之前的timestamp(14)
my_date2
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
14
)
my_tm2
=
int
(
my_date2
.
strftime
(
"
%
s"
))
#一个月之前的timestamp(30)
my_date3
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
30
)
my_tm3
=
int
(
my_date3
.
strftime
(
"
%
s"
))
#两个月之前的timestamp(60)
my_date4
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
60
)
my_tm4
=
int
(
my_date4
.
strftime
(
"
%
s"
))
#三个月之前的timestamp(90)
my_date5
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
90
)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_click_zero_uid_count
(
platform
):
"""
platform : "ios","android","all"
rtype : dict
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2}))
\
union all
\
select '7-14' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2}))
\
union all
\
select '14-30' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3}))
\
union all
\
select '30-60' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {5})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4}))
\
union all
\
select '60-90' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {6})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {5}))
\
union all
\
select '90+' as label,count(distinct(device_id)) from data_feed_exposure
\
where device_type{0}
\
and from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_id not in
\
(select distinct(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
and device_type{1})
\
and device_id in
\
(select distinct(device_id)
\
from data_feed_exposure
\
where device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {6}))"
.
format
(
platform
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
my_tm1
,
my_tm2
,
my_tm3
,
my_tm4
,
my_tm5
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
dct
=
{}
for
i
in
result
:
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
no_click_uid_detail_all
=
get_click_zero_uid_count
(
"all"
)
no_click_uid_detail_ios
=
get_click_zero_uid_count
(
"ios"
)
no_click_uid_detail_android
=
get_click_zero_uid_count
(
"android"
)
eda/recommended_indexs_v2/getRegisterUidDetail.py
0 → 100644
View file @
4758923b
import
datetime
import
pymysql
#一周之前的timestamp(7)
my_date1
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
7
)
my_tm1
=
int
(
my_date1
.
strftime
(
"
%
s"
))
#二周之前的timestamp(14)
my_date2
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
14
)
my_tm2
=
int
(
my_date2
.
strftime
(
"
%
s"
))
#一个月之前的timestamp(30)
my_date3
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
30
)
my_tm3
=
int
(
my_date3
.
strftime
(
"
%
s"
))
#两个月之前的timestamp(60)
my_date4
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
60
)
my_tm4
=
int
(
my_date4
.
strftime
(
"
%
s"
))
#三个月之前的timestamp(90)
my_date5
=
datetime
.
date
.
today
()
-
datetime
.
timedelta
(
days
=
90
)
my_tm5
=
int
(
my_date5
.
strftime
(
"
%
s"
))
def
get_register_uid_count
():
"""
rtype : dict
"""
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
sql
=
"select '0-7' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {0})
\
union all
\
select '7-14' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {1})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {0})
\
union all
\
select '14-30' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {1})
\
union all
\
select '30-60' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {2})
\
union all
\
select '60-90' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id not in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})
\
and device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {3})
\
union all
\
select '90+' as label,count(distinct(device_id))
\
from data_feed_exposure
\
where device_id in
\
(select distinct(device_id) from data_feed_exposure
\
where time < {4})"
.
format
(
my_tm1
,
my_tm2
,
my_tm3
,
my_tm4
,
my_tm5
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
dct
=
{}
for
i
in
result
:
dct
[
i
[
0
]]
=
i
[
1
]
return
dct
if
__name__
==
'__main__'
:
register_uid_detail
=
get_register_uid_count
()
eda/recommended_indexs_v2/main.py
View file @
4758923b
This diff is collapsed.
Click to expand it.
eda/recommended_indexs_v2/
getT
opFeatures.py
→
eda/recommended_indexs_v2/
t
opFeatures.py
View file @
4758923b
File moved
eda/recommended_indexs_v2/utils.py
View file @
4758923b
...
...
@@ -35,52 +35,3 @@ def get_yesterday_date():
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
return
yesterday
#获取各个平台下的活跃用户点击率
def
get_activate_uid_ctr
(
platform
,
ndays
=
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
if
platform
==
"ios"
:
platform
=
"='App Store'"
elif
platform
==
"android"
:
platform
=
"!='App Store'"
else
:
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_id in
\
(select device_id from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{2})
\
and device_type{3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
platform
=
"苹果"
elif
platform
==
"!='App Store'"
:
platform
=
"安卓"
else
:
platform
=
"所有"
return
[
platform
,
clk_count
,
imp_count
,
clk_rate
]
#获取 (点击次数 : 独立用户数)
def
get_click_times_to_count_uid_df
():
"""
rtype : tuple
"""
sql
=
"select times,count(device_id)
\
from (select device_id,count(cid_type) as times
\
from data_feed_click
\
where from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -1 day)
\
group by device_id) as t
\
group by times order by times"
result
=
con_sql
(
sql
)
return
result
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment