change result format and output path

049f0087 · 高雅喆 · a0e73dd2 · 049f0087
Commit 049f0087 authored Oct 18, 2018 by 高雅喆
Show whitespace changes
Inline Side-by-side

Showing with 41 additions and 13 deletions

WeafareStat.scala eda/feededa/src/main/scala/com/gmei/WeafareStat.scala +41 -13

No files found.
--- a/eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
+++ b/eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
@@ -44,6 +44,7 @@ object WeafareStat {
      ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
+      import sc.implicits._
      val stat_date = GmeiConfig.getMinusNDate(1)
      println(stat_date)
      val video_cids = sc.sql(
@@ -72,9 +73,9 @@ object WeafareStat {
      val partition_date = stat_date.replace("-","")
      println(partition_date)
-      val video_count = sc.sql(
+      val video_meigou_count = sc.sql(
        s"""
-           |select count(page_name) as vd_count
+           |select '${stat_date}' as stat_date, count(page_name) as video_meigou_count
           |from online.bl_hdfs_page_view_updates pv inner join tmp1
           |on pv.referrer_id = tmp1.cid_id
           |where pv.partition_date = '${partition_date}'
@@ -82,15 +83,11 @@ object WeafareStat {
           |and pv.referrer='diary_detail'
         """.stripMargin
      )
-      video_count.show()
+      video_meigou_count.show()
-      val output1 = "./test_vd_cids.csv"
-      video_count.repartition(1)
-        .write.format("com.databricks.spark.csv")
-        .option("header","true").save(output1)
-      val txt_count = sc.sql(
+      val txt_meigou_count = sc.sql(
        s"""
-           |select count(page_name) as txt_count
+           |select '${stat_date}' as stat_date, count(page_name) as txt_meigou_count
           |from online.bl_hdfs_page_view_updates pv inner join tmp2
           |on pv.referrer_id = tmp2.cid_id
           |where pv.partition_date = '${partition_date}'
@@ -98,11 +95,42 @@ object WeafareStat {
           |and pv.referrer='diary_detail'
         """.stripMargin
      )
-      txt_count.show()
+      txt_meigou_count.show()
-      val output2 = "./test_txt_cids.csv"
-      txt_count.repartition(1)
+      val video_clk_count = sc.sql(
+        s"""
+           |select '${stat_date}' as stat_date, count(cid_id) as video_clk_count
+           |from data_feed_click
+           |where cid_type = 'diary'
+           |and  cid_id  in (select cid from diary_video where stat_date='${stat_date}')
+           |and stat_date=''${stat_date}'
+         """.stripMargin
+      )
+      video_clk_count.show()
+      val txt_clk_count = sc.sql(
+        s"""
+           |select '${stat_date}' as stat_date, count(cid_id) as txt_clk_count
+           |from data_feed_click
+           |where cid_type = 'diary'
+           |and  cid_id not in (select cid from diary_video where stat_date='${stat_date}')
+           |and stat_date='${stat_date}'
+         """.stripMargin
+      )
+      txt_clk_count.show()
+      val result = video_clk_count.join(video_meigou_count,"stat_date")
+        .join(txt_clk_count,"stat_date")
+        .join(txt_meigou_count,"stat_date")
+      result.withColumn("video_rate",result.col("video_meigou_count")/result.col("video_clk_count"))
+      result.withColumn("txt_rate",result.col("txt_meigou_count")/result.col("txt_clk_count"))
+      result.show()
+      val output = "/data2/model/eda/gray_stat/welfare_detail_count_fromvideo.csv"
+      result.repartition(1)
        .write.format("com.databricks.spark.csv")
-        .option("header","true").save(output2)
+        .option("header","true").save(output)
      sc.stop()