Commit 91caf4cd authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

修改训练集开始日期
parents 92d02e7f 0fc26443
...@@ -499,4 +499,5 @@ object CTR_precise { ...@@ -499,4 +499,5 @@ object CTR_precise {
} }
} }
\ No newline at end of file
...@@ -218,34 +218,145 @@ object Repeated_content_recommendation { ...@@ -218,34 +218,145 @@ object Repeated_content_recommendation {
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table") ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
val stat_date = GmeiConfig.getMinusNDate(1) // val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = param.date val stat_date = param.date
val partition_date = stat_date.replace("-","") val partition_date = stat_date.replace("-","")
val exp_diary = sc.sql(
val agency_id = sc.sql(
s""" s"""
|select concat_ws('|',device_id,cid_id) |SELECT DISTINCT(cl_id) as device_id
|from data_feed_exposure |FROM online.ml_hospital_spam_pv_day
|where cid_type = 'diary' |WHERE partition_date >= '20180402'
|and device_id not in (select device_id from blacklist) |AND partition_date <= '${partition_date}'
|and stat_date ='${stat_date}' |AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|select distinct(device_id)
|from blacklist
""".stripMargin
)
agency_id.createOrReplaceTempView("agency_id")
val device_id_oldUser = sc.sql(
s"""
|select distinct(om.device_id) as device_id
|from online.ml_device_day_active_status om left join agency_id
|on om.device_id = agency_id.device_id
|where om.active_type = '4'
|and om.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and om.partition_date ='${partition_date}'
|and agency_id.device_id is null
""".stripMargin
)
device_id_oldUser.createOrReplaceTempView("device_id_old")
device_id_oldUser.show()
val device_id_newUser = sc.sql(
s"""
|select distinct(om.device_id) as device_id
|from online.ml_device_day_active_status om left join agency_id
|on om.device_id = agency_id.device_id
|where om.active_type != '4'
|and om.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and om.partition_date ='${partition_date}'
|and agency_id.device_id is null
""".stripMargin """.stripMargin
) )
exp_diary.show() device_id_newUser.createOrReplaceTempView("device_id_new")
val get_result =exp_diary.rdd.map((_, 1)).reduceByKey(_ + _) device_id_newUser.show()
val exp_diary_new = sc.sql(
s"""
|select concat_ws('|',de.device_id,de.cid_id)
|from data_feed_exposure de inner join device_id_new
|on de.device_id=device_id_new.device_id
|where de.cid_type = 'diary'
|and de.stat_date ='${stat_date}'
""".stripMargin
)
val get_result_new =exp_diary_new.rdd.map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false) .sortBy(_._2,false)
val more_than2=get_result.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y) val more_than2_new=get_result_new.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
println(more_than2) println(more_than2_new)
val all =get_result.map(_._2).reduce((x,y)=>x+y) val all_new =get_result_new.map(_._2).reduce((x,y)=>x+y)
println(all) println(all_new)
val repeated_rate= more_than2 / all.toDouble val repeated_rate_new= more_than2_new / all_new.toDouble
println(repeated_rate) println(repeated_rate_new)
val test=List((stat_date,repeated_rate))
val df = sc.createDataFrame(test)
val exp_diary_old = sc.sql(
s"""
|select concat_ws('|',de.device_id,de.cid_id)
|from data_feed_exposure de inner join device_id_old
|on de.device_id=device_id_old.device_id
|where de.cid_type = 'diary'
|and de.stat_date ='${stat_date}'
""".stripMargin
)
val get_result_old =exp_diary_old.rdd.map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false)
val more_than2_old=get_result_old.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
println(more_than2_old)
val all_old =get_result_old.map(_._2).reduce((x,y)=>x+y)
println(all_old)
val repeated_rate_old= more_than2_old / all_old.toDouble
println(repeated_rate_old)
val result2=List((stat_date,more_than2_old,all_old,more_than2_new,all_new))
val df2 = sc.createDataFrame(result2).toDF("stat_date","old_rep_count","old_imp_all","new_rep_count","new_imp_all")
GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator", SaveMode.Append) GmeiConfig.writeToJDBCTable(df2, table = "Repeated_evaluation_indicator", SaveMode.Append)
// val exp_diary_old = sc.sql(
// s"""
// |select concat_ws('|',de.device_id,de.cid_id)
// |from data_feed_exposure de inner join device_id_old
// |where de.cid_type = 'diary'
// |and de.stat_date ='${stat_date}'
// """.stripMargin
// )
// val get_result_old =exp_diary_old.rdd.map((_, 1)).reduceByKey(_ + _)
// .sortBy(_._2,false)
//
// val more_than2_old=get_result_old.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
// println(more_than2_old)
// val all_old =get_result_old.map(_._2).reduce((x,y)=>x+y)
// println(all_old)
// val repeated_rate_old= more_than2_old / all_old.toDouble
// println(repeated_rate_old)
//
//
// val result2=List((stat_date,more_than2_old,all_old))
// val df2 = sc.createDataFrame(result2).toDF("stat_date","old_rep_count","old_imp_all")
//
// GmeiConfig.writeToJDBCTable(df2, table = "Repeated_evaluation_indicator_old", SaveMode.Append)
// val temp=get_result.collect() // val temp=get_result.collect()
...@@ -260,8 +371,6 @@ object Repeated_content_recommendation { ...@@ -260,8 +371,6 @@ object Repeated_content_recommendation {
} }
object Repeated_content_recommendation_moreday { object Repeated_content_recommendation_moreday {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......
...@@ -408,6 +408,11 @@ object testt { ...@@ -408,6 +408,11 @@ object testt {
object diary_clk_card { object diary_clk_card {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment