Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
2c37208a
Commit
2c37208a
authored
Oct 15, 2018
by
赵晨
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/master'
parents
ed2cb21f
4fd9eed9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
133 additions
and
63 deletions
+133
-63
node2vec_ctr.py
eda/gray_stat/node2vec_ctr.py
+133
-63
No files found.
eda/gray_stat/node2vec_ctr.py
View file @
2c37208a
# -*- coding: UTF-8 -*-
import
pymysql
import
datetime
import
pandas
as
pd
DIRECTORY_PATH
=
"/data2/ffm/"
def
get_yesterday_date
():
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
yesterday
)
return
yesterday
#today = datetime.date.today().strftime("%Y%m%d")
#return today
def
get_data
():
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
conn2db
.
cursor
()
sql
=
"select device_id from nd_device_cid_similarity_matrix where device_id regexp '[3|4]$'"
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
device_id
=
tuple
(
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
())
cursor
.
close
()
return
device_id
def
ctr
(
date
):
device_id
=
get_data
()
sql_click
=
"select count(cid) from data_feed_click "
\
"where cid_type = 'diary' "
\
"and stat_date = '{}' and device_id in {};"
.
format
(
date
,
device_id
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql_click
)
click
=
cursor
.
fetchone
()[
0
]
print
(
"点击数:"
+
str
(
click
))
sql_exp
=
"select count(cid) from data_feed_exposure "
\
"where cid_type = 'diary' and stat_date = '{}' and "
\
"device_id in {}"
.
format
(
date
,
device_id
)
cursor
.
execute
(
sql_exp
)
exp
=
cursor
.
fetchone
()[
0
]
print
(
"曝光数:"
+
str
(
exp
))
if
exp
!=
0
:
print
(
"点击率:"
+
str
(
click
/
exp
))
return
click
,
exp
,
click
/
exp
def
rate2file
():
output_path
=
DIRECTORY_PATH
+
"node2vec_ctr.csv"
with
open
(
output_path
,
'a+'
)
as
f
:
line
=
get_yesterday_date
()
.
replace
(
'-'
,
''
)
+
','
+
str
(
temp_data
[
0
])
+
','
+
str
(
temp_data
[
1
])
+
','
+
str
(
temp_data
[
2
])
+
'
\n
'
f
.
write
(
line
)
if
__name__
==
"__main__"
:
#ctr(date)
date
=
get_yesterday_date
()
temp_data
=
ctr
(
date
)
rate2file
()
from
utils
import
con_sql
,
get_yesterday_date
,
get_between_day
import
time
OUTPUT_PATH
=
"/data2/models/eda/node2vec/"
class
GrayStat
(
object
):
def
__init__
(
self
,
cid_type
,
uid_type
,
ndays
=
get_yesterday_date
()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self
.
cid_type
=
cid_type
self
.
uid_type
=
uid_type
self
.
ndays
=
ndays
def
get_uid_count
(
self
):
sql
=
"select count(distinct(device_id)) from data_feed_click
\
where stat_date='{0}'
\
and (cid_type='{1}' or cid_type='diary_video')
\
and device_id regexp '[{2}]$'
\
and device_id in
\
(select device_id
\
from nd_device_cid_similarity_matrix_tmp
\
where stat_date='{0}')
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_count
=
con_sql
(
sql
)[
0
][
0
]
return
uid_count
def
get_uid_clk_times
(
self
):
sql
=
"select count(device_id) from data_feed_click
\
where stat_date='{0}'
\
and (cid_type='{1}' or cid_type='diary_video')
\
and device_id regexp '[{2}]$'
\
and device_id in
\
(select device_id
\
from nd_device_cid_similarity_matrix_tmp
\
where stat_date='{0}')
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_clk_times
=
con_sql
(
sql
)[
0
][
0
]
return
uid_clk_times
def
get_uid_imp_times
(
self
):
sql
=
"select count(device_id) from data_feed_exposure
\
where stat_date='{0}'
\
and cid_type='{1}'
\
and device_id regexp '[{2}]$'
\
and device_id in
\
(select device_id
\
from nd_device_cid_similarity_matrix_tmp
\
where stat_date='{0}')
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_imp_times
=
con_sql
(
sql
)[
0
][
0
]
return
uid_imp_times
class
AllStat
(
object
):
def
__init__
(
self
,
cid_type
,
uid_type
,
ndays
=
get_yesterday_date
()):
"""
cid_type : diary,answer,question
uid_type : 8:_8结尾;6:_6结尾;6|8:_6或者_8结尾;^68:不是6或者8结尾的
ndays : '2018-08-30'....
"""
self
.
cid_type
=
cid_type
self
.
uid_type
=
uid_type
self
.
ndays
=
ndays
def
get_uid_count
(
self
):
sql
=
"select count(distinct(device_id)) from data_feed_click
\
where stat_date='{0}'
\
and (cid_type='{1}' or cid_type='diary_video')
\
and device_id regexp '[{2}]$'
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_count
=
con_sql
(
sql
)[
0
][
0
]
return
uid_count
def
get_uid_clk_times
(
self
):
sql
=
"select count(device_id) from data_feed_click
\
where stat_date='{0}'
\
and (cid_type='{1}' or cid_type='diary_video')
\
and device_id regexp '[{2}]$'
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_clk_times
=
con_sql
(
sql
)[
0
][
0
]
return
uid_clk_times
def
get_uid_imp_times
(
self
):
sql
=
"select count(device_id) from data_feed_exposure
\
where stat_date='{0}'
\
and cid_type='{1}'
\
and device_id regexp '[{2}]$'
\
and device_id not in (select distinct(device_id) from jerry_test.bl_device_list)
\
and device_id not in (select distinct(device_id) from jerry_prod.blacklist)"
.
format
(
self
.
ndays
,
\
self
.
cid_type
,
self
.
uid_type
)
uid_imp_times
=
con_sql
(
sql
)[
0
][
0
]
return
uid_imp_times
def
main
():
date_list
=
get_between_day
(
'2018-10-11'
,
'2018-10-14'
)
output
=
OUTPUT_PATH
+
"ctr.csv"
result
=
[]
for
my_date
in
date_list
:
print
(
"stat"
+
" "
+
my_date
)
g_class
=
GrayStat
(
"diary"
,
"3|4"
,
my_date
)
a_class
=
AllStat
(
"diary"
,
"3|4"
,
my_date
)
line1
=
str
(
g_class
.
get_uid_count
())
+
"
\t
"
+
str
(
g_class
.
get_uid_imp_times
())
+
"
\t
"
+
str
(
g_class
.
get_uid_clk_times
())
line2
=
str
(
a_class
.
get_uid_count
())
+
"
\t
"
+
str
(
a_class
.
get_uid_imp_times
())
+
"
\t
"
+
str
(
a_class
.
get_uid_clk_times
())
g_ctr
=
g_class
.
get_uid_clk_times
()
/
g_class
.
get_uid_imp_times
()
a_ctr
=
a_class
.
get_uid_clk_times
()
/
a_class
.
get_uid_imp_times
()
growth_rate
=
(
g_ctr
-
a_ctr
)
/
a_ctr
line
=
my_date
+
"
\t
"
+
str
(
round
(
g_ctr
*
100
,
2
))
+
'
%
'
+
"
\t
"
+
str
(
round
(
a_ctr
*
100
,
2
))
+
'
%
'
+
"
\t
"
+
\
str
(
round
(
growth_rate
*
100
,
2
))
+
'
%
'
+
"
\t
"
+
line1
+
"
\t
"
+
line2
+
"
\n
"
result
.
append
(
line
)
with
open
(
output
,
"a+"
)
as
f
:
for
line
in
result
:
f
.
write
(
line
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment