Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
a781ec96
Commit
a781ec96
authored
Jan 08, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new files
parent
3e25470a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
231 additions
and
24 deletions
+231
-24
Recommendation_strategy_all.scala
...src/main/scala/com/gmei/Recommendation_strategy_all.scala
+6
-7
find_bug.scala
eda/feededa/src/main/scala/com/gmei/find_bug.scala
+225
-17
No files found.
eda/feededa/src/main/scala/com/gmei/Recommendation_strategy_all.scala
View file @
a781ec96
...
...
@@ -47,7 +47,6 @@ object Recommendation_strategy_all {
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
...
...
@@ -62,12 +61,12 @@ object Recommendation_strategy_all {
|from online.ml_device_day_active_status
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
|
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
|
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
|
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
|
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03
')
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
|
,'promotion_shike','promotion_julang_jl03','','unknown
')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
...
...
eda/feededa/src/main/scala/com/gmei/find_bug.scala
View file @
a781ec96
...
...
@@ -47,7 +47,6 @@ object find_bug {
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
...
...
@@ -56,38 +55,246 @@ object find_bug {
val
stat_date
=
param
.
date
//println(param.date)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
val
decive_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
decive_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
val
clk_count_oldUser_Contrast
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_Contrast
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
clk_count_all
=
sc
.
sql
(
val
imp_count_oldUser_Contrast
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(cid_id) as clk_count_all
|from data_feed_click
|where (cid_type = 'diary' or cid_type = 'diary_video')
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_Contrast
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
imp_count
_all
=
sc
.
sql
(
val
clk_count_oldUser
_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(cid_id) as imp_count_all
|from data_feed_exposure
|where cid_type = 'diary'
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_all
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
imp_count_oldUser_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_all
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
result1
=
clk_count_all
.
join
(
imp_count_all
,
"stat_date"
)
val
result1
=
clk_count_oldUser_Contrast
.
join
(
imp_count_oldUser_Contrast
,
"stat_date"
)
.
join
(
clk_count_oldUser_all
,
"stat_date"
)
.
join
(
imp_count_oldUser_all
,
"stat_date"
)
result1
.
show
()
GmeiConfig
.
writeToJDBCTable
(
result1
,
"
find_bug
"
,
SaveMode
.
Append
)
GmeiConfig
.
writeToJDBCTable
(
result1
,
"
bug_Recommendation_strategy_temp
"
,
SaveMode
.
Append
)
//device_id尾号1有点击用户日记本点击数
val
clk_active_1
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(jd.cid_id) as clk_active_1
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
//device_id尾号1有点击用户日记本曝光数
val
imp_active_1
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(je.cid_id) as imp_active_1
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where device_id regexp '1$$' and stat_date = '${stat_date}')
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
//device_id尾号1点击日记本用户数
val
clk_diary_device
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(distinct(jd.device_id)) as clk_diary_device
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
//所有有点击用户日记本点击数
val
clk_active_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(jd.cid_id) as clk_active_all
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
//所有有点击用户日记本曝光数
val
imp_active_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(je.cid_id) as imp_active_all
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where stat_date = '${stat_date}')
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
//策略命中用户点击日记本用户数
val
clk_diary_device_cover
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(device_id)) as clk_diary_device_cover
|from merge_queue_table
|where device_id in (select distinct(device_id) from data_feed_click where stat_date = '${stat_date}')
"""
.
stripMargin
)
//策略命中用户总数
val
device_all_cover
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(device_id)) as device_all_cover
|from merge_queue_table
"""
.
stripMargin
)
val
result2
=
clk_active_1
.
join
(
imp_active_1
,
"stat_date"
)
.
join
(
clk_active_all
,
"stat_date"
)
.
join
(
imp_active_all
,
"stat_date"
)
.
join
(
clk_diary_device
,
"stat_date"
)
.
join
(
clk_diary_device_cover
,
"stat_date"
)
.
join
(
device_all_cover
,
"stat_date"
)
result2
.
show
()
GmeiConfig
.
writeToJDBCTable
(
result2
,
"bug_strategy_other"
,
SaveMode
.
Append
)
//统计新用户点击率
val
devicee_id_newUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type != '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
devicee_id_newUser
.
show
()
devicee_id_newUser
.
createOrReplaceTempView
(
"device_id_new"
)
val
clk_count_newUser_Contrast
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_newUser_Contrast
|from data_feed_click jd inner join device_id_new
|on jd.device_id = device_id_new.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
imp_count_newUser_Contrast
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_Contrast
|from data_feed_exposure je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
clk_count_newUser_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_newUser_all
|from data_feed_click jd inner join device_id_new
|on jd.device_id = device_id_new.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
imp_count_newUser_all
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all
|from data_feed_exposure je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
"""
.
stripMargin
)
val
result3
=
clk_count_newUser_Contrast
.
join
(
imp_count_newUser_Contrast
,
"stat_date"
)
.
join
(
clk_count_newUser_all
,
"stat_date"
)
.
join
(
imp_count_newUser_all
,
"stat_date"
)
result3
.
show
()
GmeiConfig
.
writeToJDBCTable
(
result3
,
"bug_Recommendation_strategy_newUser"
,
SaveMode
.
Append
)
}
}
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment