Commit fa68f71c authored by 高雅喆's avatar 高雅喆

esmm pre data on activate user

parents bc59c4dc fd5abfad
...@@ -55,7 +55,7 @@ object Recommendation_strategy_all { ...@@ -55,7 +55,7 @@ object Recommendation_strategy_all {
val partition_date = stat_date.replace("-","") val partition_date = stat_date.replace("-","")
val decive_id_oldUser = sc.sql( val decive_id_oldUser = sc.sql(
s""" s"""
|select distinct(device_id) as decive_id |select distinct(device_id) as device_id
|from online.ml_device_day_active_status |from online.ml_device_day_active_status
|where active_type = '4' |where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
...@@ -75,7 +75,7 @@ object Recommendation_strategy_all { ...@@ -75,7 +75,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_Contrast |select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_Contrast
|from data_feed_click jd inner join device_id_old |from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.decive_id |on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video') |where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$' |and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list) |and jd.device_id not in (select device_id from bl_device_list)
...@@ -88,7 +88,7 @@ object Recommendation_strategy_all { ...@@ -88,7 +88,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_Contrast |select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_Contrast
|from data_feed_exposure je inner join device_id_old |from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id |on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary' |where je.cid_type = 'diary'
|and je.device_id regexp'1$$' |and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
...@@ -101,7 +101,7 @@ object Recommendation_strategy_all { ...@@ -101,7 +101,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_all |select '${stat_date}' as stat_date, count(cid_id) as clk_count_oldUser_all
|from data_feed_click jd inner join device_id_old |from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.decive_id |on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video') |where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list) |and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist) |and jd.device_id not in (select device_id from blacklist)
...@@ -113,7 +113,7 @@ object Recommendation_strategy_all { ...@@ -113,7 +113,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_all |select '${stat_date}' as stat_date, count(cid_id) as imp_count_oldUser_all
|from data_feed_exposure je inner join device_id_old |from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id |on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary' |where je.cid_type = 'diary'
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist) |and je.device_id not in (select device_id from blacklist)
...@@ -167,7 +167,7 @@ object Recommendation_strategy_all { ...@@ -167,7 +167,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date, count(distinct(je.device_id)) as device_num_1_hit |select '${stat_date}' as stat_date, count(distinct(je.device_id)) as device_num_1_hit
|from data_feed_exposure je inner join device_id_old |from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id |on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary' |where je.cid_type = 'diary'
|and je.device_id regexp'1$$' |and je.device_id regexp'1$$'
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
...@@ -253,7 +253,7 @@ object Recommendation_strategy_all { ...@@ -253,7 +253,7 @@ object Recommendation_strategy_all {
|and jd.device_id not in (select device_id from bl_device_list) |and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist) |and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${stat_date}' |and jd.stat_date ='${stat_date}'
""".stripMargin """.stripMargin
) )
//所有有点击用户日记本曝光数 //所有有点击用户日记本曝光数
...@@ -267,7 +267,7 @@ object Recommendation_strategy_all { ...@@ -267,7 +267,7 @@ object Recommendation_strategy_all {
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist) |and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${stat_date}' |and je.stat_date ='${stat_date}'
""".stripMargin """.stripMargin
) )
//策略命中用户点击日记本用户数 //策略命中用户点击日记本用户数
...@@ -276,7 +276,7 @@ object Recommendation_strategy_all { ...@@ -276,7 +276,7 @@ object Recommendation_strategy_all {
|select '${stat_date}' as stat_date,count(distinct(device_id)) as clk_diary_device_cover |select '${stat_date}' as stat_date,count(distinct(device_id)) as clk_diary_device_cover
|from merge_queue_table |from merge_queue_table
|where device_id in (select distinct(device_id) from data_feed_click where stat_date = '${stat_date}') |where device_id in (select distinct(device_id) from data_feed_click where stat_date = '${stat_date}')
""".stripMargin """.stripMargin
) )
//策略命中用户总数 //策略命中用户总数
...@@ -284,7 +284,7 @@ object Recommendation_strategy_all { ...@@ -284,7 +284,7 @@ object Recommendation_strategy_all {
s""" s"""
|select '${stat_date}' as stat_date,count(distinct(device_id)) as device_all_cover |select '${stat_date}' as stat_date,count(distinct(device_id)) as device_all_cover
|from merge_queue_table |from merge_queue_table
""".stripMargin """.stripMargin
) )
val result2 = clk_active_1.join(imp_active_1,"stat_date") val result2 = clk_active_1.join(imp_active_1,"stat_date")
...@@ -298,10 +298,6 @@ object Recommendation_strategy_all { ...@@ -298,10 +298,6 @@ object Recommendation_strategy_all {
GmeiConfig.writeToJDBCTable(result2, "strategy_other", SaveMode.Append) GmeiConfig.writeToJDBCTable(result2, "strategy_other", SaveMode.Append)
//统计新用户点击率 //统计新用户点击率
val devicee_id_newUser = sc.sql( val devicee_id_newUser = sc.sql(
s""" s"""
......
...@@ -279,8 +279,6 @@ object diary_exposure { ...@@ -279,8 +279,6 @@ object diary_exposure {
val final_cid_city = diary_id_temp.join(df_cid_city,Seq("diary_id"),"left_outer") val final_cid_city = diary_id_temp.join(df_cid_city,Seq("diary_id"),"left_outer")
final_cid_city.show() final_cid_city.show()
val df1=final_cid_city.groupBy("name").count().orderBy(desc("count")) val df1=final_cid_city.groupBy("name").count().orderBy(desc("count"))
df1.show(400)
//3.5星以上日记本的id //3.5星以上日记本的id
val diary_id_temp2 = sc.sql( val diary_id_temp2 = sc.sql(
...@@ -289,7 +287,7 @@ object diary_exposure { ...@@ -289,7 +287,7 @@ object diary_exposure {
|from src_mimas_prod_api_diary |from src_mimas_prod_api_diary
|where content_level >=3.5 |where content_level >=3.5
|and doctor_id is not null |and doctor_id is not null
""".stripMargin """.stripMargin
) )
diary_id_temp2.createOrReplaceTempView("diary_id_temp2") diary_id_temp2.createOrReplaceTempView("diary_id_temp2")
...@@ -315,7 +313,6 @@ object diary_exposure { ...@@ -315,7 +313,6 @@ object diary_exposure {
val final_cid_city2 = diary_id_temp2.join(df_cid_city2,Seq("diary_id"),"left_outer") val final_cid_city2 = diary_id_temp2.join(df_cid_city2,Seq("diary_id"),"left_outer")
final_cid_city2.show() final_cid_city2.show()
val df2 =final_cid_city2.groupBy("name").count().orderBy(desc("count")) val df2 =final_cid_city2.groupBy("name").count().orderBy(desc("count"))
df2.show(400)
val df3 =df1.join(df2,Seq("name"),"left_outer") val df3 =df1.join(df2,Seq("name"),"left_outer")
df3.show(400) df3.show(400)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment