统计话题pv

a11d9c7e · 王志伟 · 68f52789 · a11d9c7e · a11d9c7e
Commit a11d9c7e authored Jan 16, 2019 by 王志伟
Hide whitespace changes
Inline Side-by-side

Showing with 81 additions and 0 deletions

temp_analysis.scala eda/feededa/src/main/scala/com/gmei/temp_analysis.scala +74 -0

temp_count.scala eda/feededa/src/main/scala/com/gmei/temp_count.scala +7 -0

No files found.
--- a/eda/feededa/src/main/scala/com/gmei/temp_analysis.scala
+++ b/eda/feededa/src/main/scala/com/gmei/temp_analysis.scala
@@ -730,7 +730,81 @@ object smart_rank_count {



+//话题相关问题统计


+object question_count {
+
+  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
+  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
+
+  case class Params(env: String = "dev",
+                    date: String = "2018-08-01"
+                   ) extends AbstractParams[Params] with Serializable
+
+  val defaultParams = Params()
+
+  val parser = new OptionParser[Params]("Feed_EDA") {
+    head("WeafareStat")
+    opt[String]("env")
+      .text(s"the databases environment you used")
+      .action((x, c) => c.copy(env = x))
+    opt[String] ("date")
+      .text(s"the date you used")
+      .action((x,c) => c.copy(date = x))
+    note(
+      """
+        |For example, the following command runs this app on a tidb dataset:
+        |
+        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
+      """.stripMargin +
+        s"|   --env ${defaultParams.env}"
+    )
+  }
+
+  def main(args: Array[String]): Unit = {
+    parser.parse(args, defaultParams).map { param =>
+      GmeiConfig.setup(param.env)
+      val spark_env = GmeiConfig.getSparkSession()
+      val sc = spark_env._2
+
+      val ti = new TiContext(sc)
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
+      ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
+
+
+      import sc.implicits._
+      val stat_date = GmeiConfig.getMinusNDate(1)
+      //println(param.date)
+      val partition_date = stat_date.replace("-","")
+
+      val question_id=List(212264,212266,212272,212281,212287,212436,212439,212437,212505,212506,212507,212522,212523,212526,212532,212783,212787,212789,212793,212796,213202,213199,213216,213219,213297,213224,213226,213239,213300,213302,213316,213307,213308,213370,213377,213349,213358,213368,213392,213393,213435,213453,213445,213448,213458,213466,213471,213478,213485,213638,213642,213644,213727,213729,213775,213776,213810,213817,213805,213821,213884,213885,213892,213834,213879,214043,214050,214062,214055,214056,214058,214064,214159,214182,214149,214184,214190,214206,214227,214243,214242,214288,214289,214293,214295,214541,214544,214546,214614,214618,214619,214620,214682,214683,214684,214848,214850,214854,214856,214857,214903,214908,214913,214918,214919,214980,214981,214988,214985,215031,215034,215036,215039,215094,215098,215104,215107,215112,215222,215225,215233,215237,215265,215366,215347,215346,215343)
+
+      for( a <- question_id ){
+        val agency_id = sc.sql(
+          s"""
+             |SELECT partition_date,count(cl_id)
+             |FROM online.tl_hdfs_maidian_view
+             |WHERE partition_date >= '20190101'
+             |and action='community_home_click_feed_card'
+             |and params["card_type"]="问题"
+             |and params['business_id']='${a}'
+             |group by partition_date
+             |order by partition_date
+         """.stripMargin
+        )
+        println("question_id:",a)
+      }
+
+    }
+
+
+  }
+
+}


--- a/eda/feededa/src/main/scala/com/gmei/temp_count.scala
+++ b/eda/feededa/src/main/scala/com/gmei/temp_count.scala
@@ -3,6 +3,7 @@ package com.gmei
 import java.io.Serializable
 import java.text.SimpleDateFormat

+import breeze.linalg.split
 import com.gmei.WeafareStat.{defaultParams, parser}
 import org.apache.spark.sql.{SaveMode, TiContext}
 import org.apache.log4j.{Level, Logger}
@@ -320,7 +321,13 @@ object Repeated_content_recommendation_moreday {
         """.stripMargin
      ).rdd.map(row=>(row(0).toString,row(1).toString,row(2).toString)).map(row=>(row._2,row._3)).groupByKey()
        .filter(x => x._2.size >1)
+
+      //打印结果
      val temp=exp_diary.take(10).foreach(println)
+
+      //统计每个用户重复日记个数
+      val count_imp=exp_diary.map(_._2).map(row=>row.map((_, 1)).reduceByKey(_ + _))
+
 //      exp_diary.show()
 //      exp_diary.createOrReplaceTempView("exp_diary")
 //      GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)