Commit e7c2d6a6 authored by 王志伟's avatar 王志伟

合并统计文件

parent 9e5e6dc8
...@@ -188,10 +188,6 @@ object Recommendation_strategy_all { ...@@ -188,10 +188,6 @@ object Recommendation_strategy_all {
|and je.stat_date ='${stat_date}' |and je.stat_date ='${stat_date}'
""".stripMargin """.stripMargin
) )
val result1 = clk_count_oldUser_Contrast.join(imp_count_oldUser_Contrast,"stat_date") val result1 = clk_count_oldUser_Contrast.join(imp_count_oldUser_Contrast,"stat_date")
.join(clk_count_oldUser_all,"stat_date") .join(clk_count_oldUser_all,"stat_date")
.join(imp_count_oldUser_all,"stat_date") .join(imp_count_oldUser_all,"stat_date")
...@@ -291,8 +287,6 @@ object Recommendation_strategy_all { ...@@ -291,8 +287,6 @@ object Recommendation_strategy_all {
""".stripMargin """.stripMargin
) )
val result2 = clk_active_1.join(imp_active_1,"stat_date") val result2 = clk_active_1.join(imp_active_1,"stat_date")
.join(clk_active_all,"stat_date") .join(clk_active_all,"stat_date")
.join(imp_active_all,"stat_date") .join(imp_active_all,"stat_date")
...@@ -308,6 +302,83 @@ object Recommendation_strategy_all { ...@@ -308,6 +302,83 @@ object Recommendation_strategy_all {
//统计新用户点击率
val devicee_id_newUser = sc.sql(
s"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type != '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03')
|and partition_date ='${partition_date}'
""".stripMargin
)
devicee_id_newUser.show()
devicee_id_newUser.createOrReplaceTempView("device_id_new")
val clk_count_newUser_Contrast = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_newUser_Contrast
|from data_feed_click jd inner join device_id_new
|on jd.device_id = device_id_new.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
""".stripMargin
)
val imp_count_newUser_Contrast = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_Contrast
|from data_feed_exposure je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
val clk_count_newUser_all = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_newUser_all
|from data_feed_click jd inner join device_id_new
|on jd.device_id = device_id_new.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}'
""".stripMargin
)
val imp_count_newUser_all = sc.sql(
s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_newUser_all
|from data_feed_exposure je inner join device_id_new
|on je.device_id = device_id_new.device_id
|where je.cid_type = 'diary'
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}'
""".stripMargin
)
val result3 = clk_count_newUser_Contrast.join(imp_count_newUser_Contrast,"stat_date")
.join(clk_count_newUser_all,"stat_date")
.join(imp_count_newUser_all,"stat_date")
result3.show()
GmeiConfig.writeToJDBCTable(result3, "Recommendation_strategy_newUser", SaveMode.Append)
} }
......
...@@ -139,10 +139,8 @@ object strategy_other { ...@@ -139,10 +139,8 @@ object strategy_other {
//下边内容开始分析统计推荐系统评价指标 //下边内容开始分析统计推荐系统评价指标
//使用信息熵描述推荐系统对长尾优质物品(日记本)的挖掘能力
//使用基尼系数描述推荐系统对日记本推荐是否具有马太效应
object evaluation_indicator { object diary_exposure {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF) Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
...@@ -176,21 +174,67 @@ object evaluation_indicator { ...@@ -176,21 +174,67 @@ object evaluation_indicator {
GmeiConfig.setup(param.env) GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession() val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2 val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "mimas_prod", tableName = "api_diary_tags")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "zhengxing", tableName = "api_tag")
import sc.implicits._ val stat_date = GmeiConfig.getMinusNDate(1)
//val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date) val temp1 = sc.sql(
val partition_date = param.date.replace("-","") s"""
val diary_id = sc.sql( |select diary_id,tag_id
|from api_diary_tags
""".stripMargin
)
temp1.createOrReplaceTempView("temp1")
val temp2 = sc.sql(
s""" s"""
|select native_queue as diary_id |select distinct(temp1.diary_id) as diary_id,a.name as city
|from doris_prod.device_diary_queue |from api_tag a left join temp1
|on a.id = temp1.tag_id
|where a.tag_type = 4
""".stripMargin """.stripMargin
) )
diary_id.show() temp2.createOrReplaceTempView("temp2")
val exposure_city = sc.sql(
s"""
|select temp2.diary_id as diary_id,count(a.cid_id) as imp_num,temp2.city as city
|from data_feed_exposure a left join temp2
|on a.cid_id = temp2.diary_id
|where a.stat_date='${stat_date}'
|and temp2.diary_id is not null
""".stripMargin
)
exposure_city.createOrReplaceTempView("exposure_city")
val city_exposure = sc.sql(
s"""
|select count(imp_num) as num,city
|from exposure_city
|where group by city order by num
""".stripMargin
)
city_exposure.show(20)
city_exposure.createOrReplaceTempView("city_exposure")
val final_count = sc.sql(
s"""
|select city,num/count(num)
|from city_exposure
""".stripMargin
)
final_count.show()
val temp = final_count.collect()
for (i <- 0 until temp.length ) {
println(temp(i))
}
GmeiConfig.writeToJDBCTable(diary_id, "dairy_id_queue", SaveMode.Append) //GmeiConfig.writeToJDBCTable(diary_id, "dairy_id_queue", SaveMode.Append)
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment