Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ba9ae360
Commit
ba9ae360
authored
Apr 28, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
change test file
parents
a11ec230
9f72344a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
104 additions
and
133 deletions
+104
-133
temp_analysis.scala
eda/feededa/src/main/scala/com/gmei/temp_analysis.scala
+34
-66
temp_count.scala
eda/feededa/src/main/scala/com/gmei/temp_count.scala
+70
-67
No files found.
eda/feededa/src/main/scala/com/gmei/temp_analysis.scala
View file @
ba9ae360
...
...
@@ -50,17 +50,10 @@ object temp_analysis {
// val ti = new TiContext(sc)
sc
.
sql
(
"use jerry_prod"
)
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import
sc.implicits._
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
//
val stat_date=param.date
//
val stat_date = GmeiConfig.getMinusNDate(1)
val
stat_date
=
param
.
date
//println(param.date)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
...
...
@@ -81,25 +74,6 @@ object temp_analysis {
)
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
// //每日新用户
// val device_id_newUser = sc.sql(
// s"""
// |select distinct(device_id) as device_id
// |from online.ml_device_day_active_status
// |where active_type != '4'
// |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
// | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
// | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
// | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
// | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
// | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
// | ,'promotion_shike','promotion_julang_jl03')
// |and partition_date ='${partition_date}'
// """.stripMargin
// )
// device_id_newUser.createOrReplaceTempView("device_id_new")
val
blacklist_id
=
sc
.
sql
(
s
"""
|SELECT device_id
...
...
@@ -120,48 +94,44 @@ object temp_analysis {
final_id
.
createOrReplaceTempView
(
"final_id"
)
val
diary_clk_all
=
sc
.
sql
(
// //每日新用户
val
device_id_newUser
=
sc
.
sql
(
s
"""
|select ov.partition_date,count(ov.cl_id) as clk_num,count(distinct(ov.cl_id)),count(ov.cl_id)/count(distinct(ov.cl_id))
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.action = "page_view"
|and params['page_name']="diary_detail"
|and ov.cl_id != "NULL"
|and ov.partition_date >='20181201'
|select distinct(oms.device_id) as device_id
|from online.ml_device_day_active_status oms left join final_id
|on oms.device_id=final_id.device_id
|where oms.active_type != '4'
|and oms.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03')
|and oms.partition_date ='${partition_date}'
|and final_id.device_id is null
|group by ov.partition_date
|order by ov.partition_date
"""
.
stripMargin
"""
.
stripMargin
)
diary_clk_all
.
show
(
80
)
device_id_newUser
.
createOrReplaceTempView
(
"device_id_new"
)
//日记本点击
val
referrer
=
List
(
"about_me_message_list"
,
"all_case_service_comment"
,
"all_cases"
,
"diary_detail"
,
"diary_list"
,
"diary_listof_related_service"
,
"answer_detail"
,
"community_home"
,
"conversation_detail"
,
"create_diary_title"
,
"diary_listof_related_service"
,
"doctor_all_cases"
,
"hospital_all_cases"
,
"my_favor"
,
"my_order"
,
"order_detail"
,
"personal_store_diary_list"
,
"received_votes"
,
"topic_detail"
,
"welfare_detail"
,
"welfare_list"
,
"welfare_special"
,
"wiki_detail"
,
"zone_detail"
,
"expert_detail"
,
"free_activity_detail"
,
"home"
,
"message_home"
,
"my_diary"
,
"organization_detail"
,
"other_homepage"
,
"question_detail"
,
"search_result_diary"
,
"search_result_more"
,
"welfare_detail"
,
"zone_v3"
)
for
(
a
<-
referrer
){
val
diary_clk_temp
=
sc
.
sql
(
s
"""
|select ov.partition_date,count(ov.cl_id) as clk_num,count(distinct(ov.cl_id)),count(ov.cl_id)/count(distinct(ov.cl_id))
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.action = "page_view"
|and params['page_name']="diary_detail"
|and params['referrer']='${a}'
|and ov.cl_id != "NULL"
|and ov.partition_date >='20181201'
|and final_id.device_id is null
|group by ov.partition_date
|order by ov.partition_date
val
diary_clk_new
=
sc
.
sql
(
s
"""
|select ov.partition_date,ov.cl_id as device_id,ov.params['diary_id'] as diary_id
|from online.tl_hdfs_maidian_view ov inner join device_id_new
|on ov.cl_id = device_id_new.device_id
|where ov.action = 'on_click_diary_card'
|and ov.params['tab_name'] = '精选'
|and ov.params['page_name'] = 'home'
|and ov.partition_date='${partition_date}'
"""
.
stripMargin
)
println
(
"来源:"
,
a
)
diary_clk_temp
.
show
(
80
)
)
diary_clk_new
.
show
(
80
)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
diary_clk_new
,
table
=
"temp"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
}
...
...
@@ -169,8 +139,6 @@ object temp_analysis {
}
}
}
...
...
eda/feededa/src/main/scala/com/gmei/temp_count.scala
View file @
ba9ae360
...
...
@@ -957,75 +957,78 @@ object find_reason {
//2.当天新用户中的点击用户数
//
val new_clk_count = sc.sql(
//
s"""
//
|select '${stat_date}' as stat_date,count(distinct(oc.device_id)) as new_clk_count
//
|from all_clk_diary_card oc inner join device_id_new
//
|on oc.device_id = device_id_new.device_id
//
""".stripMargin
//
)
//
//
2.1 有点击的新用户
//
val new_clk_device = sc.sql(
//
s"""
//
|select distinct(oc.device_id) as device_id
//
|from all_clk_diary_card oc inner join device_id_new
//
|on oc.device_id = device_id_new.device_id
//
""".stripMargin
//
)
//
new_clk_device.createOrReplaceTempView("new_clk_device")
//
//
//
//3.当天老用户数
//
//
val old_count = sc.sql(
//
s"""
//
|select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
//
|from device_id_old dio left join agency_id
//
|on dio.device_id = agency_id.device_id
//
|where agency_id.device_id is null
//
""".stripMargin
//
)
//
//
//4.当天新用户数
//
val new_count = sc.sql(
//
s"""
//
|select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
//
|from device_id_new din left join agency_id
//
|on din.device_id = agency_id.device_id
//
|where agency_id.device_id is null
//
""".stripMargin
//
)
//
//
//5.有点击老用户的曝光数
//
val exp_clkold_count = sc.sql(
//
s"""
//
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
//
|from data_feed_exposure_precise dp inner join old_clk_device
//
|on dp.device_id = old_clk_device.device_id
//
|where stat_date='${stat_date}'
//
|group by stat_date
//
""".stripMargin
//
)
//
//
//6.有点击新用户的曝光数
//
val exp_clknew_count = sc.sql(
//
s"""
//
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
//
|from data_feed_exposure_precise dp inner join new_clk_device
//
|on dp.device_id = new_clk_device.device_id
//
|where stat_date='${stat_date}'
//
|group by stat_date
//
""".stripMargin
//
)
//
//
val result = old_clk_count.join(new_clk_count,"stat_date")
//
.join(old_count,"stat_date")
//
.join(new_count,"stat_date")
//
.join(exp_clkold_count,"stat_date")
//
.join(exp_clknew_count,"stat_date")
//
val
new_clk_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(oc.device_id)) as new_clk_count
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
"""
.
stripMargin
)
//2.1 有点击的新用户
val
new_clk_device
=
sc
.
sql
(
s
"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
"""
.
stripMargin
)
new_clk_device
.
createOrReplaceTempView
(
"new_clk_device"
)
//3.当天老用户数
val
old_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
|from device_id_old dio left join agency_id
|on dio.device_id = agency_id.device_id
|where agency_id.device_id is null
"""
.
stripMargin
)
//4.当天新用户数
val
new_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
|from device_id_new din left join agency_id
|on din.device_id = agency_id.device_id
|where agency_id.device_id is null
"""
.
stripMargin
)
//5.有点击老用户的曝光数
val
exp_clkold_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
|from data_feed_exposure_precise dp inner join old_clk_device
|on dp.device_id = old_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
"""
.
stripMargin
)
//6.有点击新用户的曝光数
val
exp_clknew_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
|from data_feed_exposure_precise dp inner join new_clk_device
|on dp.device_id = new_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
"""
.
stripMargin
)
val
result
=
old_clk_count
.
join
(
new_clk_count
,
"stat_date"
)
.
join
(
old_count
,
"stat_date"
)
.
join
(
new_count
,
"stat_date"
)
.
join
(
exp_clkold_count
,
"stat_date"
)
.
join
(
exp_clknew_count
,
"stat_date"
)
// GmeiConfig.writeToJDBCTable(result, "device_clk_imp_reason", SaveMode.Append)
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
result
,
table
=
"device_clk_imp_reason"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment