Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
245ea49d
Commit
245ea49d
authored
Oct 25, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
add video count
parents
d2fb13ea
ffefbc57
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
103 additions
and
15 deletions
+103
-15
ctr-56.py
ctr-56.py
+5
-15
test_supervisor.py
local/test_supervisor.py
+1
-0
rank01_ctr.py
rank01_ctr.py
+97
-0
No files found.
ctr-56.py
View file @
245ea49d
...
...
@@ -34,23 +34,13 @@ def ctr_all():
result
=
cursor
.
fetchall
()
tail56
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
start
=
"2018-09-05"
sql_all
=
"select distinct device_id from data_feed_click "
\
"where stat_date >= '{}' and stat_date <= '{}' and cid_type = 'diary'"
.
format
(
start
,
date
)
cursor
.
execute
(
sql_all
)
result_all
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result_all
))
all_user
=
df
[
0
]
.
values
.
tolist
()
print
(
"老用户总数:"
)
print
(
len
(
all_user
))
day_all
=
set
(
all_user
)
&
set
(
tail56
)
print
(
"当天老用户总数:"
)
print
(
len
(
day_all
))
cover
=
len
(
day_all
&
set
(
device_id
))
print
(
"当天尾号5或6活跃用户总数:"
)
print
(
len
(
tail56
))
cover
=
len
(
set
(
tail56
)
&
set
(
device_id
))
print
(
"当天尾号5或6活跃用户覆盖数:"
)
print
(
cover
)
cover_percent
=
format
(
cover
/
len
(
day_all
),
".3
f"
)
cover_percent
=
format
(
cover
/
len
(
tail56
),
".6
f"
)
print
(
"当天尾号5或6活跃用户覆盖率:"
)
print
(
cover_percent
)
sql_click
=
"select count(cid) from data_feed_click "
\
...
...
@@ -66,7 +56,7 @@ def ctr_all():
exp
=
cursor
.
fetchone
()[
0
]
print
(
"曝光数:"
+
str
(
exp
))
print
(
"点击率:"
+
str
(
click
/
exp
))
return
len
(
day_all
),
cover
,
cover_percent
,
click
,
exp
,
format
(
click
/
exp
,
".6f"
)
return
len
(
tail56
),
cover
,
cover_percent
,
click
,
exp
,
format
(
click
/
exp
,
".6f"
)
def
ctr
():
...
...
local/test_supervisor.py
View file @
245ea49d
...
...
@@ -196,6 +196,7 @@ def router(device_id):
predict
(
user_profile
)
if
__name__
==
"__main__"
:
sql
=
"delete from data_feed_click where stat_date = '2018-10-17'"
while
True
:
start
=
time
.
time
()
empty
,
device_id_list
=
get_active_users
()
...
...
rank01_ctr.py
0 → 100644
View file @
245ea49d
# -*- coding: UTF-8 -*-
import
pymysql
import
datetime
import
pandas
as
pd
def
get_yesterday_date
():
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
yesterday
)
return
yesterday
def
get_data
():
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
cursor
=
conn2db
.
cursor
()
sql
=
"select device_id from ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
device
=
tuple
(
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
())
cursor
.
close
()
print
(
device
[
0
:
2
])
return
device
def
ctr_all
():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
sql_active
=
"select distinct device_id from data_feed_exposure "
\
"where cid_type = 'diary' and device_id regexp'[5|6]$' and stat_date = '{}';"
.
format
(
date
)
cursor
.
execute
(
sql_active
)
result
=
cursor
.
fetchall
()
tail56
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
print
(
"当天尾号5或6活跃用户总数:"
)
print
(
len
(
tail56
))
cover
=
len
(
set
(
tail56
)
&
set
(
device_id
))
print
(
"当天尾号5或6活跃用户覆盖数:"
)
print
(
cover
)
cover_percent
=
format
(
cover
/
len
(
tail56
),
".3f"
)
print
(
"当天尾号5或6活跃用户覆盖率:"
)
print
(
cover_percent
)
sql_click
=
"select count(cid) from data_feed_click "
\
"where (cid_type = 'diary' or cid_type = 'diary_video') "
\
"and stat_date = '{}' and device_id regexp '[5|6]$';"
.
format
(
date
)
cursor
.
execute
(
sql_click
)
click
=
cursor
.
fetchone
()[
0
]
print
(
"点击数:"
+
str
(
click
))
sql_exp
=
"select count(cid) from data_feed_exposure "
\
"where (cid_type = 'diary' or cid_type = 'diary_video') and stat_date = '{}' and "
\
"device_id regexp '[5|6]$'"
.
format
(
date
)
cursor
.
execute
(
sql_exp
)
exp
=
cursor
.
fetchone
()[
0
]
print
(
"曝光数:"
+
str
(
exp
))
print
(
"点击率:"
+
str
(
click
/
exp
))
return
len
(
tail56
),
cover
,
cover_percent
,
click
,
exp
,
format
(
click
/
exp
,
".6f"
)
def
ctr
():
sql_click
=
"select count(cid) from data_feed_click "
\
"where (cid_type = 'diary' or cid_type = 'diary_video') "
\
"and stat_date = '{}' and device_id in {};"
.
format
(
date
,
device_id
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql_click
)
click
=
cursor
.
fetchone
()[
0
]
print
(
"实验用户点击数:"
+
str
(
click
))
sql_exp
=
"select count(cid) from data_feed_exposure "
\
"where (cid_type = 'diary' or cid_type = 'diary_video') and stat_date = '{}' and "
\
"device_id in {}"
.
format
(
date
,
device_id
)
cursor
.
execute
(
sql_exp
)
exp
=
cursor
.
fetchone
()[
0
]
print
(
"实验用户曝光数:"
+
str
(
exp
))
print
(
"实验用户点击率:"
+
str
(
click
/
exp
))
return
click
,
exp
,
format
(
click
/
exp
,
".6f"
)
def
rate2file
():
output_path
=
DIRECTORY_PATH
+
"56ctr.csv"
with
open
(
output_path
,
'a+'
)
as
f
:
line
=
date
.
replace
(
'-'
,
''
)
+
','
+
str
(
temp_data
[
0
])
+
','
+
str
(
temp_data
[
1
])
+
','
+
str
(
temp_data
[
2
])
+
\
","
+
str
(
data
[
0
])
+
","
+
str
(
data
[
1
])
+
","
+
str
(
data
[
2
])
+
","
+
str
(
data
[
3
])
+
","
+
str
(
data
[
4
])
\
+
","
+
str
(
data
[
5
])
+
'
\n
'
f
.
write
(
line
)
if
__name__
==
"__main__"
:
DIRECTORY_PATH
=
"/data2/ffm/"
date
=
get_yesterday_date
()
device_id
=
get_data
()
temp_data
=
ctr
()
data
=
ctr_all
()
rate2file
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment