Commit 5771eaa1 authored by 王志伟's avatar 王志伟

拆分新老用户重复曝光指标

parent b15d0eea
......@@ -499,4 +499,5 @@ object CTR_precise {
}
}
\ No newline at end of file
}
......@@ -223,27 +223,110 @@ object Repeated_content_recommendation {
val partition_date = stat_date.replace("-","")
val exp_diary = sc.sql(
val agency_id = sc.sql(
s"""
|select concat_ws('|',device_id,cid_id)
|from data_feed_exposure
|where cid_type = 'diary'
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|select distinct(device_id)
|from blacklist
""".stripMargin
)
agency_id.createOrReplaceTempView("agency_id")
val device_id_oldUser = sc.sql(
s"""
|select distinct(om.device_id) as device_id
|from online.ml_device_day_active_status om left join agency_id
|on om.device_id = agency_id.device_id
|where om.active_type = '4'
|and om.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and om.partition_date ='${partition_date}'
|and agency_id.device_id is null
""".stripMargin
)
device_id_oldUser.createOrReplaceTempView("device_id_old")
val device_id_newUser = sc.sql(
s"""
|select distinct(om.device_id) as device_id
|from online.ml_device_day_active_status om left join agency_id
|on om.device_id = agency_id.device_id
|where om.active_type != '4'
|and om.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and om.partition_date ='${partition_date}'
|and agency_id.device_id is null
""".stripMargin
)
device_id_newUser.createOrReplaceTempView("device_id_new")
val exp_diary_new = sc.sql(
s"""
|select concat_ws('|',de.device_id,de.cid_id)
|from data_feed_exposure de inner join device_id_new
|on de.device_id=device_id_new.device_id
|where de.cid_type = 'diary'
|and de.stat_date ='${stat_date}'
""".stripMargin
)
exp_diary.show()
val get_result =exp_diary.rdd.map((_, 1)).reduceByKey(_ + _)
val get_result_new =exp_diary_new.rdd.map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false)
val more_than2=get_result.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
println(more_than2)
val all =get_result.map(_._2).reduce((x,y)=>x+y)
println(all)
val repeated_rate= more_than2 / all.toDouble
println(repeated_rate)
val test=List((stat_date,repeated_rate))
val df = sc.createDataFrame(test)
val more_than2_new=get_result_new.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
println(more_than2_new)
val all_new =get_result_new.map(_._2).reduce((x,y)=>x+y)
println(all_new)
val repeated_rate_new= more_than2_new / all_new.toDouble
println(repeated_rate_new)
val exp_diary_old = sc.sql(
s"""
|select concat_ws('|',de.device_id,de.cid_id)
|from data_feed_exposure de inner join device_id_old
|where de.cid_type = 'diary'
|and de.stat_date ='${stat_date}'
""".stripMargin
)
val get_result_old =exp_diary_old.rdd.map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false)
val more_than2_old=get_result_old.filter(_._2 >=2).map(_._2).reduce((x,y)=>x+y)
println(more_than2_old)
val all_old =get_result_old.map(_._2).reduce((x,y)=>x+y)
println(all_old)
val repeated_rate_old= more_than2_old / all_old.toDouble
println(repeated_rate_old)
val result=List((stat_date,more_than2_new,all_new,more_than2_old,all_old))
val df = sc.createDataFrame(result).toDF("stat_date","new_rep_count","new_imp_all","old_rep_count","old_imp_all")
GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator", SaveMode.Append)
......
......@@ -408,6 +408,11 @@ object testt {
object diary_clk_card {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment