Commit eb0339ca authored by 王志伟's avatar 王志伟

合并删除代码

parent 6d279dc3
......@@ -545,6 +545,19 @@ object diary_clk_card {
""".stripMargin
)
val imp_count_oldUser_Contrast_precise = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_Contrast_precise
|from data_feed_exposure_precise je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
val clk_count_oldUser_all_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_oldUser_all_a
......@@ -581,6 +594,17 @@ object diary_clk_card {
""".stripMargin
)
val imp_count_oldUser_all_precise = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_all_precise
|from data_feed_exposure_precise je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
//统计新用户ctr
val device_id_newUser = sc.sql(
s"""
......@@ -640,205 +664,12 @@ object diary_clk_card {
""".stripMargin
)
val clk_count_newUser_all_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_a
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='on_click_diary_card'
|and ot.params['tab_name'] = '精选'
|and ot.params['page_name'] = 'home'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val clk_count_newUser_all_b = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_b
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='full_stack_click_video_card_full_screen_play'
|and ot.params['tab_name'] = '精选'
|and ot.params["card_type"]="diary"
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val imp_count_newUser_all = sc.sql(
val imp_count_newUser_Contrast_precise = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all
|from data_feed_exposure je inner join device_id_new
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_Contrast_precise
|from data_feed_exposure_precise je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
val result1 = clk_count_oldUser_Contrast_a.join(clk_count_oldUser_Contrast_b,"stat_date")
.join(imp_count_oldUser_Contrast,"stat_date")
.join(clk_count_oldUser_all_a,"stat_date")
.join(clk_count_oldUser_all_b,"stat_date")
.join(imp_count_oldUser_all,"stat_date")
.join(clk_count_newUser_Contrast_a,"stat_date")
.join(clk_count_newUser_Contrast_b,"stat_date")
.join(imp_count_newUser_Contrast,"stat_date")
.join(clk_count_newUser_all_a,"stat_date")
.join(clk_count_newUser_all_b,"stat_date")
.join(imp_count_newUser_all,"stat_date")
result1.show()
GmeiConfig.writeToJDBCTable(result1, "on_click_diary_card", SaveMode.Append)
}
}
}
object jingxuan_liebiao_CTR_Precise {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure_precise")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
val blacklist = sc.sql(
s"""
|select device_id from blacklist
""".stripMargin
)
blacklist.createOrReplaceTempView("blacklist")
val agency_id = sc.sql(
s"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
""".stripMargin
)
agency_id.createOrReplaceTempView("agency_id")
val blacklist_all=sc.sql(
s"""
|SELECT device_id
|FROM blacklist
|UNION ALL
|SELECT device_id
|FROM agency_id
""".stripMargin
)
blacklist_all.createOrReplaceTempView("blacklist_all")
val device_id_oldUser = sc.sql(
s"""
|select distinct(om.device_id) as device_id
|from online.ml_device_day_active_status om left join blacklist_all
|on om.device_id = blacklist_all.device_id
|where om.active_type = '4'
|and om.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and om.partition_date ='${partition_date}'
|and blacklist_all.device_id is null
""".stripMargin
)
device_id_oldUser.createOrReplaceTempView("device_id_old")
val clk_count_oldUser_Contrast_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_oldUser_Contrast_a
|from online.tl_hdfs_maidian_view ot inner join device_id_old
|on ot.cl_id = device_id_old.device_id
|where ot.action='on_click_diary_card'
|and ot.params['tab_name'] = '精选'
|and ot.params['page_name'] = 'home'
|and ot.cl_id regexp'1$$'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val clk_count_oldUser_Contrast_b = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_oldUser_Contrast_b
|from online.tl_hdfs_maidian_view ot inner join device_id_old
|on ot.cl_id = device_id_old.device_id
|where ot.action='full_stack_click_video_card_full_screen_play'
|and ot.params['tab_name'] = '精选'
|and ot.params["card_type"]="diary"
|and ot.cl_id regexp'1$$'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val imp_count_oldUser_Contrast = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_Contrast
|from data_feed_exposure_precise je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
......@@ -846,130 +677,45 @@ object jingxuan_liebiao_CTR_Precise {
)
val clk_count_oldUser_all_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_oldUser_all_a
|from online.tl_hdfs_maidian_view ot inner join device_id_old
|on ot.cl_id = device_id_old.device_id
|where ot.action='on_click_diary_card'
|and ot.params['tab_name'] = '精选'
|and ot.params['page_name'] = 'home'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val clk_count_oldUser_all_b = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_oldUser_all_b
|from online.tl_hdfs_maidian_view ot inner join device_id_old
|on ot.cl_id = device_id_old.device_id
|where ot.action='full_stack_click_video_card_full_screen_play'
|and ot.params['tab_name'] = '精选'
|and ot.params["card_type"]="diary"
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val imp_count_oldUser_all = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_all
|from data_feed_exposure_precise je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
//统计新用户点击率
val device_id_newUser = sc.sql(
s"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type != '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}'
""".stripMargin
)
device_id_newUser.createOrReplaceTempView("device_id_new")
val clk_count_newUser_Contrast_a = sc.sql(
val clk_count_newUser_all_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_Contrast_a
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_a
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='on_click_diary_card'
|and ot.params['tab_name'] = '精选'
|and ot.params['page_name'] = 'home'
|and ot.cl_id regexp'1$$'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val clk_count_newUser_Contrast_b = sc.sql(
val clk_count_newUser_all_b = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_Contrast_b
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_b
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='full_stack_click_video_card_full_screen_play'
|and ot.params['tab_name'] = '精选'
|and ot.params["card_type"]="diary"
|and ot.cl_id regexp'1$$'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val imp_count_newUser_Contrast = sc.sql(
val imp_count_newUser_all = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_Contrast
|from data_feed_exposure_precise je inner join device_id_new
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all
|from data_feed_exposure je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
val clk_count_newUser_all_a = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_a
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='on_click_diary_card'
|and ot.params['tab_name'] = '精选'
|and ot.params['page_name'] = 'home'
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val clk_count_newUser_all_b = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(ot.cl_id) as clk_count_newUser_all_b
|from online.tl_hdfs_maidian_view ot inner join device_id_new
|on ot.cl_id = device_id_new.device_id
|where ot.action='full_stack_click_video_card_full_screen_play'
|and ot.params['tab_name'] = '精选'
|and ot.params["card_type"]="diary"
|and ot.partition_date ='${partition_date}'
""".stripMargin
)
val imp_count_newUser_all = sc.sql(
val imp_count_newUser_all_precise = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all_precise
|from data_feed_exposure_precise je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
......@@ -978,6 +724,7 @@ object jingxuan_liebiao_CTR_Precise {
""".stripMargin
)
val result1 = clk_count_oldUser_Contrast_a.join(clk_count_oldUser_Contrast_b,"stat_date")
.join(imp_count_oldUser_Contrast,"stat_date")
.join(clk_count_oldUser_all_a,"stat_date")
......@@ -989,19 +736,14 @@ object jingxuan_liebiao_CTR_Precise {
.join(clk_count_newUser_all_a,"stat_date")
.join(clk_count_newUser_all_b,"stat_date")
.join(imp_count_newUser_all,"stat_date")
result1.show()
GmeiConfig.writeToJDBCTable(result1, "on_click_diary_card", SaveMode.Append)
GmeiConfig.writeToJDBCTable(result1, "jingxuan_liebiao_CTR_Precise", SaveMode.Append)
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment