Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
9fbc4300
Commit
9fbc4300
authored
Aug 28, 2018
by
高雅喆
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimize sql with stat_date
parent
e4092f53
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
35 additions
and
43 deletions
+35
-43
cidRate.py
eda/recommended_indexs/cidRate.py
+10
-12
clkCidUidRate.py
eda/recommended_indexs/clkCidUidRate.py
+6
-8
func.py
eda/recommended_indexs/func.py
+14
-16
topFeatures.py
eda/recommended_indexs/topFeatures.py
+5
-7
No files found.
eda/recommended_indexs/cidRate.py
View file @
9fbc4300
# -*- coding: UTF-8 -*-
from
utils
import
con_sql
from
utils
import
con_sql
,
get_yesterday_date
class
CidRate
(
object
):
def
__init__
(
self
,
platform
,
cid_type
,
ndays
=
1
):
def
__init__
(
self
,
platform
,
cid_type
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
self
.
platform
=
"='App Store'"
elif
platform
==
"android"
:
...
...
@@ -25,13 +23,13 @@ class CidRate(object):
rtype : list
"""
sql_cid
=
"select count(cid) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_type{1}
\
and cid_type='{2}'"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
and cid_type='{2}'"
.
format
(
get_yesterday_date
()
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
cid_clk_count
=
con_sql
(
sql_cid
)[
0
][
0
]
sql_all
=
"select count(cid) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
)
where
stat_date > '{0}'
\
and device_type{1}"
.
format
(
get_yesterday_date
()
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
)
all_clk_count
=
con_sql
(
sql_all
)[
0
][
0
]
cid_clk_rate
=
round
(
cid_clk_count
/
all_clk_count
,
4
)
return
[
platform
,
cid_clk_count
,
all_clk_count
,
cid_clk_rate
]
...
...
@@ -43,12 +41,12 @@ class CidRate(object):
rtype : list
"""
sql_cid
=
"select count(cid) from data_feed_exposure2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1} and cid_type='{2}'"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
where
stat_date > '{0}'
\
and device_type{1} and cid_type='{2}'"
.
format
(
get_yesterday_date
()
,
self
.
platform
,
self
.
cid_type
)
cid_imp_count
=
con_sql
(
sql_cid
)[
0
][
0
]
sql_all
=
"select count(cid) from data_feed_exposure2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
self
.
ndays
,
self
.
platform
)
where
stat_date > '{0}'
\
and device_type{1}"
.
format
(
get_yesterday_date
()
,
self
.
platform
)
all_imp_count
=
con_sql
(
sql_all
)[
0
][
0
]
cid_imp_rate
=
round
(
cid_imp_count
/
all_imp_count
,
4
)
return
[
platform
,
cid_imp_count
,
all_imp_count
,
cid_imp_rate
]
...
...
eda/recommended_indexs/clkCidUidRate.py
View file @
9fbc4300
# -*- coding: UTF-8 -*-
from
utils
import
con_sql
from
utils
import
con_sql
,
get_yesterday_date
class
ClkCidUidRate
(
object
):
def
__init__
(
self
,
platform
,
cid_type
,
ndays
=
1
):
def
__init__
(
self
,
platform
,
cid_type
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question';"everything"...
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
self
.
platform
=
"='App Store'"
elif
platform
==
"android"
:
...
...
@@ -27,15 +25,15 @@ class ClkCidUidRate(object):
rtype : list
"""
sql_clk
=
"select count(distinct(device_id)) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_type{1}
\
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
and cid_type{2}"
.
format
(
get_yesterday_date
()
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(distinct(device_id)) from data_feed_exposure2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_type{1}
\
and cid_type{2}"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
and cid_type{2}"
.
format
(
get_yesterday_date
()
,
self
.
platform
,
self
.
cid_type
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
...
...
eda/recommended_indexs/func.py
View file @
9fbc4300
from
utils
import
con_sql
from
utils
import
con_sql
,
get_yesterday_date
from
getClickZeroUidDetail
import
get_click_zero_uid_count
from
getRegisterUidDetail
import
get_register_uid_count
#获取各个平台下的活跃用户点击率
def
get_activate_uid_ctr
(
platform
,
ndays
=
1
):
def
get_activate_uid_ctr
(
platform
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
rtype : list
"""
...
...
@@ -18,16 +17,16 @@ def get_activate_uid_ctr(platform, ndays=1):
else
:
platform
=
" is not null"
sql_clk
=
"select count(device_id) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and device_type{1}"
.
format
(
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
where
stat_date > '{0}'
\
and device_type{1}"
.
format
(
get_yesterday_date
()
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
)
clk_count
=
con_sql
(
sql_clk
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_id in
\
(select device_id from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{1} day)
\
and device_type{
2
})
\
and device_type{
3}"
.
format
(
ndays
,
ndays
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
where
stat_date > '{0}'
\
and device_type{
1
})
\
and device_type{
2}"
.
format
(
get_yesterday_date
()
,
platform
.
replace
(
' '
,
''
)
if
platform
[
-
2
]
==
'e'
else
platform
,
platform
)
imp_count
=
con_sql
(
sql_imp
)[
0
][
0
]
clk_rate
=
round
(
clk_count
/
imp_count
,
4
)
if
platform
==
"='App Store'"
:
...
...
@@ -39,9 +38,8 @@ def get_activate_uid_ctr(platform, ndays=1):
return
[
platform
,
clk_count
,
imp_count
,
clk_rate
]
#获取活跃用户平均每天曝光次数
def
get_activate_uid_imp_times
(
city
,
ndays
=
1
):
def
get_activate_uid_imp_times
(
city
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
city : 'beijing';'all'
rtype : list
"""
...
...
@@ -50,16 +48,16 @@ def get_activate_uid_imp_times(city,ndays=1):
else
:
city
=
" is not null"
sql_uid
=
"select count(distinct(device_id)) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
where
stat_date > '{0}'
\
and city_id{1}"
.
format
(
get_yesterday_date
()
,
city
)
sql_uid_count
=
con_sql
(
sql_uid
)[
0
][
0
]
sql_imp
=
"select count(device_id) from data_feed_exposure2
\
where device_id in
\
(select device_id from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and city_id{1})
\
and
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
and city_id{1}"
.
format
(
ndays
,
city
)
and
stat_date > '{0}'
\
and city_id{1}"
.
format
(
get_yesterday_date
()
,
city
)
sql_imp_times
=
con_sql
(
sql_imp
)[
0
][
0
]
if
city
==
"='beijing'"
:
city
=
"北京"
...
...
eda/recommended_indexs/topFeatures.py
View file @
9fbc4300
...
...
@@ -4,14 +4,12 @@ from config import DIRECTORY_PATH
class
TopFeatures
(
object
):
def
__init__
(
self
,
platform
,
cid_type
,
top_n
=-
1
,
ndays
=
1
):
def
__init__
(
self
,
platform
,
cid_type
,
top_n
=-
1
):
"""
ndays : 1;2;3;4.. #The number of days from the current time
platform : 'all';'ios';'android'
cid_type : 'diary';'answer';'question'...
top_n : the top rows of the result
"""
self
.
ndays
=
ndays
if
platform
==
"ios"
:
self
.
platform
=
"='App Store'"
elif
platform
==
"android"
:
...
...
@@ -25,10 +23,10 @@ class TopFeatures(object):
def
get_click_times
(
self
):
# rtype : dict
sql
=
"select cid,count(cid) from data_feed_click2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_type{1} and cid_type='{2}'
\
group by cid
\
order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
order by count(cid) desc"
.
format
(
get_yesterday_date
()
,
self
.
platform
.
replace
(
' '
,
''
)
if
self
.
platform
[
-
2
]
==
'e'
else
self
.
platform
,
self
.
cid_type
)
clk_times
=
tuple2dict
(
con_sql
(
sql
))
return
clk_times
...
...
@@ -36,9 +34,9 @@ class TopFeatures(object):
def
get_impression_times
(
self
):
# rtype : dict
sql
=
"select cid,count(cid) from data_feed_exposure2
\
where
from_unixtime(time,'
%
Y-
%
m-
%
d')=date_add(curdate(), interval -{0} day)
\
where
stat_date > '{0}'
\
and device_type{1} and cid_type='{2}'
\
group by cid order by count(cid) desc"
.
format
(
self
.
ndays
,
self
.
platform
,
self
.
cid_type
)
group by cid order by count(cid) desc"
.
format
(
get_yesterday_date
()
,
self
.
platform
,
self
.
cid_type
)
imp_times
=
tuple2dict
(
con_sql
(
sql
))
return
imp_times
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment