Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ca4d19f0
Commit
ca4d19f0
authored
Nov 12, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
chang
parent
7a53f531
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
107 additions
and
0 deletions
+107
-0
Recommendation_strategy_all.py
eda/gray_stat/Recommendation_strategy_all.py
+107
-0
No files found.
eda/gray_stat/Recommendation_strategy_all.py
View file @
ca4d19f0
#该文件统计ffm\node2vec\搜索推荐三个策略merge后的全量用户的ctr
#device_id尾号为1的用户为对照组
#该表中未能去除每天新用户
# -*- coding: UTF-8 -*-
from
utils
import
con_sql
,
get_yesterday_date
,
get_between_day
import
time
import
sys
OUTPUT_PATH
=
"/data2/models/eda/gray_stat/"
#获取过滤机构用户和黑用户名单后的曝光量
def
get_imp_count_all
(
stragety
,
sta_date
):
sql
=
"select count(cid_id) from jerry_prod.data_feed_exposure where "
\
"cid_type = 'diary' "
\
"and device_id regexp'{}$' "
\
"and device_id not in (select distinct(device_id) from jerry_test.bl_device_list) "
\
"and device_id not in (select device_id from jerry_prod.blacklist) and stat_date='{}'"
.
format
(
stragety
,
sta_date
)
imp_count_all
=
con_sql
(
sql
)[
0
][
0
]
return
imp_count_all
def
get_clk_count_all
(
stragety
,
sta_date
):
sql
=
"select count(cid_id) from jerry_prod.data_feed_click "
\
"where (cid_type = 'diary' or cid_type = 'diary_video') "
\
"and device_id regexp'{}$' "
\
"and device_id not in (select device_id from jerry_test.bl_device_list) "
\
"and device_id not in (select device_id from jerry_prod.blacklist) "
\
"and stat_date='{}'"
.
format
(
stragety
,
sta_date
)
clk_count_all
=
con_sql
(
sql
)[
0
][
0
]
return
clk_count_all
if
__name__
==
'__main__'
:
yesterday
=
get_yesterday_date
()
if
len
(
sys
.
argv
)
!=
2
:
print
(
"usage: python recommendation_strategy_indicator.py date"
)
date1
=
sys
.
argv
[
1
]
date_list
=
get_between_day
(
date1
,
yesterday
)
stragety_list
=
[
'[1|2]'
,
'[3|4]'
,
'[5|6]'
,
'[7|8]'
]
stragety_l
=
[
'1'
,
'3'
,
'5'
,
'7'
]
start_time
=
time
.
time
()
for
my_date
in
date_list
:
result1_imp
=
[]
result2_imp
=
[]
result1_clk
=
[]
result2_clk
=
[]
print
(
"开始获取{}数据"
.
format
(
my_date
))
for
i
in
stragety_list
:
result1
=
get_clk_count_all
(
i
,
my_date
)
result1_clk
.
append
(
result1
)
result1_all
=
get_imp_count_all
(
i
,
my_date
)
result1_imp
.
append
(
result1_all
)
for
j
in
stragety_l
:
result2
=
get_clk_count
(
j
,
my_date
)
result2_clk
.
append
(
result2
)
result2_all
=
get_imp_count
(
j
,
my_date
)
result2_imp
.
append
(
result2_all
)
num_click_2
=
result1_clk
[
0
]
-
result2_clk
[
0
]
num_click_4
=
result1_clk
[
1
]
-
result2_clk
[
1
]
num_click_6
=
result1_clk
[
2
]
-
result2_clk
[
2
]
num_click_8
=
result1_clk
[
3
]
-
result2_clk
[
3
]
num_imp_2
=
result1_imp
[
0
]
-
result2_imp
[
0
]
num_imp_4
=
result1_imp
[
1
]
-
result2_imp
[
1
]
num_imp_6
=
result1_imp
[
2
]
-
result2_imp
[
2
]
num_imp_8
=
result1_imp
[
3
]
-
result2_imp
[
3
]
ctr_12
=
round
(
result1_clk
[
0
]
/
result1_imp
[
0
],
6
)
ctr_34
=
round
(
result1_clk
[
1
]
/
result1_imp
[
1
],
6
)
ctr_56
=
round
(
result1_clk
[
2
]
/
result1_imp
[
2
],
6
)
ctr_78
=
round
(
result1_clk
[
3
]
/
result1_imp
[
3
],
6
)
ctr_1
=
round
(
result2_clk
[
0
]
/
result2_imp
[
0
],
6
)
ctr_2
=
round
(
num_click_2
/
num_imp_2
,
6
)
ctr_3
=
round
(
result2_clk
[
1
]
/
result2_imp
[
1
],
6
)
ctr_4
=
round
(
num_click_4
/
num_imp_4
,
6
)
ctr_5
=
round
(
result2_clk
[
2
]
/
result2_imp
[
2
],
6
)
ctr_6
=
round
(
num_click_6
/
num_imp_6
,
6
)
ctr_7
=
round
(
result2_clk
[
3
]
/
result2_imp
[
3
],
6
)
ctr_8
=
round
(
num_click_8
/
num_imp_8
,
6
)
print
(
result1_clk
[
0
])
print
(
result1_imp
[
0
])
print
(
ctr_12
)
print
(
ctr_34
)
output_path
=
OUTPUT_PATH
+
"recommendation.csv"
with
open
(
output_path
,
'a+'
)
as
f
:
line
=
my_date
.
replace
(
'-'
,
''
)
+
','
+
str
(
result1_clk
[
0
])
+
','
+
str
(
result1_clk
[
1
])
+
','
+
str
(
result1_clk
[
2
])
+
','
+
str
(
result1_clk
[
3
])
\
+
','
+
str
(
result1_imp
[
0
])
+
','
+
str
(
result1_imp
[
1
])
+
','
+
str
(
result1_imp
[
2
])
+
','
+
str
(
result1_imp
[
3
])
+
','
\
+
str
(
result2_clk
[
0
])
+
','
+
str
(
result2_clk
[
1
])
+
','
+
str
(
result2_clk
[
2
])
+
','
+
str
(
result2_clk
[
3
])
\
+
','
+
str
(
result2_imp
[
0
])
+
','
+
str
(
result2_imp
[
1
])
+
','
+
str
(
result2_imp
[
2
])
+
','
+
str
(
result2_imp
[
3
])
\
+
','
+
str
(
num_click_2
)
+
','
+
str
(
num_click_4
)
+
','
+
str
(
num_click_6
)
+
','
+
str
(
num_click_8
)
\
+
','
+
str
(
num_imp_2
)
+
','
+
str
(
num_imp_4
)
+
','
+
str
(
num_imp_6
)
+
','
+
str
(
num_imp_8
)
\
+
','
+
str
(
ctr_12
)
+
','
+
str
(
ctr_34
)
+
','
+
str
(
ctr_56
)
+
','
+
str
(
ctr_78
)
\
+
','
+
str
(
ctr_1
)
+
','
+
str
(
ctr_2
)
+
','
+
str
(
ctr_3
)
+
','
+
str
(
ctr_4
)
\
+
','
+
str
(
ctr_5
)
+
','
+
str
(
ctr_6
)
+
','
+
str
(
ctr_7
)
+
','
+
str
(
ctr_8
)
+
'
\n
'
f
.
write
(
line
)
end_time
=
time
.
time
()
print
(
"程序执行时间:{}s"
.
format
(
end_time
-
start_time
))
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment