Commit ba9ae360 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

change test file
parents a11ec230 9f72344a
......@@ -50,17 +50,10 @@ object temp_analysis {
// val ti = new TiContext(sc)
sc.sql("use jerry_prod")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
// ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
// ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date=param.date
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date=param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
......@@ -81,25 +74,6 @@ object temp_analysis {
)
agency_id.createOrReplaceTempView("agency_id")
// //每日新用户
// val device_id_newUser = sc.sql(
// s"""
// |select distinct(device_id) as device_id
// |from online.ml_device_day_active_status
// |where active_type != '4'
// |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
// | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
// | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
// | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
// | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
// | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
// | ,'promotion_shike','promotion_julang_jl03')
// |and partition_date ='${partition_date}'
// """.stripMargin
// )
// device_id_newUser.createOrReplaceTempView("device_id_new")
val blacklist_id = sc.sql(
s"""
|SELECT device_id
......@@ -120,48 +94,44 @@ object temp_analysis {
final_id.createOrReplaceTempView("final_id")
val diary_clk_all = sc.sql(
// //每日新用户
val device_id_newUser = sc.sql(
s"""
|select ov.partition_date,count(ov.cl_id) as clk_num,count(distinct(ov.cl_id)),count(ov.cl_id)/count(distinct(ov.cl_id))
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.action = "page_view"
|and params['page_name']="diary_detail"
|and ov.cl_id != "NULL"
|and ov.partition_date >='20181201'
|select distinct(oms.device_id) as device_id
|from online.ml_device_day_active_status oms left join final_id
|on oms.device_id=final_id.device_id
|where oms.active_type != '4'
|and oms.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03')
|and oms.partition_date ='${partition_date}'
|and final_id.device_id is null
|group by ov.partition_date
|order by ov.partition_date
""".stripMargin
""".stripMargin
)
diary_clk_all.show(80)
device_id_newUser.createOrReplaceTempView("device_id_new")
//日记本点击
val referrer=List("about_me_message_list","all_case_service_comment","all_cases","diary_detail","diary_list"
,"diary_listof_related_service","answer_detail","community_home","conversation_detail","create_diary_title","diary_listof_related_service",
"doctor_all_cases","hospital_all_cases","my_favor","my_order","order_detail","personal_store_diary_list","received_votes",
"topic_detail","welfare_detail","welfare_list","welfare_special","wiki_detail","zone_detail",
"expert_detail","free_activity_detail","home","message_home","my_diary","organization_detail","other_homepage","question_detail",
"search_result_diary","search_result_more","welfare_detail","zone_v3")
for( a <- referrer ){
val diary_clk_temp = sc.sql(
s"""
|select ov.partition_date,count(ov.cl_id) as clk_num,count(distinct(ov.cl_id)),count(ov.cl_id)/count(distinct(ov.cl_id))
|from online.tl_hdfs_maidian_view ov left join final_id
|on ov.cl_id = final_id.device_id
|where ov.action = "page_view"
|and params['page_name']="diary_detail"
|and params['referrer']='${a}'
|and ov.cl_id != "NULL"
|and ov.partition_date >='20181201'
|and final_id.device_id is null
|group by ov.partition_date
|order by ov.partition_date
val diary_clk_new = sc.sql(
s"""
|select ov.partition_date,ov.cl_id as device_id,ov.params['diary_id'] as diary_id
|from online.tl_hdfs_maidian_view ov inner join device_id_new
|on ov.cl_id = device_id_new.device_id
|where ov.action = 'on_click_diary_card'
|and ov.params['tab_name'] = '精选'
|and ov.params['page_name'] = 'home'
|and ov.partition_date='${partition_date}'
""".stripMargin
)
println("来源:",a)
diary_clk_temp.show(80)
)
diary_clk_new.show(80)
GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",diary_clk_new, table="temp",SaveMode.Append)
println("写入完成")
}
......@@ -169,8 +139,6 @@ object temp_analysis {
}
}
}
......
......@@ -957,75 +957,78 @@ object find_reason {
//2.当天新用户中的点击用户数
// val new_clk_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(distinct(oc.device_id)) as new_clk_count
// |from all_clk_diary_card oc inner join device_id_new
// |on oc.device_id = device_id_new.device_id
// """.stripMargin
// )
////2.1 有点击的新用户
// val new_clk_device = sc.sql(
// s"""
// |select distinct(oc.device_id) as device_id
// |from all_clk_diary_card oc inner join device_id_new
// |on oc.device_id = device_id_new.device_id
// """.stripMargin
// )
// new_clk_device.createOrReplaceTempView("new_clk_device")
//
//
// //3.当天老用户数
//
// val old_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
// |from device_id_old dio left join agency_id
// |on dio.device_id = agency_id.device_id
// |where agency_id.device_id is null
// """.stripMargin
// )
//
// //4.当天新用户数
// val new_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
// |from device_id_new din left join agency_id
// |on din.device_id = agency_id.device_id
// |where agency_id.device_id is null
// """.stripMargin
// )
//
// //5.有点击老用户的曝光数
// val exp_clkold_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
// |from data_feed_exposure_precise dp inner join old_clk_device
// |on dp.device_id = old_clk_device.device_id
// |where stat_date='${stat_date}'
// |group by stat_date
// """.stripMargin
// )
//
// //6.有点击新用户的曝光数
// val exp_clknew_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
// |from data_feed_exposure_precise dp inner join new_clk_device
// |on dp.device_id = new_clk_device.device_id
// |where stat_date='${stat_date}'
// |group by stat_date
// """.stripMargin
// )
//
// val result = old_clk_count.join(new_clk_count,"stat_date")
// .join(old_count,"stat_date")
// .join(new_count,"stat_date")
// .join(exp_clkold_count,"stat_date")
// .join(exp_clknew_count,"stat_date")
//
val new_clk_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(distinct(oc.device_id)) as new_clk_count
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
""".stripMargin
)
//2.1 有点击的新用户
val new_clk_device = sc.sql(
s"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
""".stripMargin
)
new_clk_device.createOrReplaceTempView("new_clk_device")
//3.当天老用户数
val old_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
|from device_id_old dio left join agency_id
|on dio.device_id = agency_id.device_id
|where agency_id.device_id is null
""".stripMargin
)
//4.当天新用户数
val new_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
|from device_id_new din left join agency_id
|on din.device_id = agency_id.device_id
|where agency_id.device_id is null
""".stripMargin
)
//5.有点击老用户的曝光数
val exp_clkold_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
|from data_feed_exposure_precise dp inner join old_clk_device
|on dp.device_id = old_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
""".stripMargin
)
//6.有点击新用户的曝光数
val exp_clknew_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
|from data_feed_exposure_precise dp inner join new_clk_device
|on dp.device_id = new_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
""".stripMargin
)
val result = old_clk_count.join(new_clk_count,"stat_date")
.join(old_count,"stat_date")
.join(new_count,"stat_date")
.join(exp_clkold_count,"stat_date")
.join(exp_clknew_count,"stat_date")
// GmeiConfig.writeToJDBCTable(result, "device_clk_imp_reason", SaveMode.Append)
GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result, table="device_clk_imp_reason",SaveMode.Append)
println("写入完成")
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment