Commit a11d9c7e authored by 王志伟's avatar 王志伟

统计话题pv

parent 68f52789
...@@ -730,7 +730,81 @@ object smart_rank_count { ...@@ -730,7 +730,81 @@ object smart_rank_count {
//话题相关问题统计
object question_count {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date)
val partition_date = stat_date.replace("-","")
val question_id=List(212264,212266,212272,212281,212287,212436,212439,212437,212505,212506,212507,212522,212523,212526,212532,212783,212787,212789,212793,212796,213202,213199,213216,213219,213297,213224,213226,213239,213300,213302,213316,213307,213308,213370,213377,213349,213358,213368,213392,213393,213435,213453,213445,213448,213458,213466,213471,213478,213485,213638,213642,213644,213727,213729,213775,213776,213810,213817,213805,213821,213884,213885,213892,213834,213879,214043,214050,214062,214055,214056,214058,214064,214159,214182,214149,214184,214190,214206,214227,214243,214242,214288,214289,214293,214295,214541,214544,214546,214614,214618,214619,214620,214682,214683,214684,214848,214850,214854,214856,214857,214903,214908,214913,214918,214919,214980,214981,214988,214985,215031,215034,215036,215039,215094,215098,215104,215107,215112,215222,215225,215233,215237,215265,215366,215347,215346,215343)
for( a <- question_id ){
val agency_id = sc.sql(
s"""
|SELECT partition_date,count(cl_id)
|FROM online.tl_hdfs_maidian_view
|WHERE partition_date >= '20190101'
|and action='community_home_click_feed_card'
|and params["card_type"]="问题"
|and params['business_id']='${a}'
|group by partition_date
|order by partition_date
""".stripMargin
)
println("question_id:",a)
}
}
}
}
...@@ -3,6 +3,7 @@ package com.gmei ...@@ -3,6 +3,7 @@ package com.gmei
import java.io.Serializable import java.io.Serializable
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import breeze.linalg.split
import com.gmei.WeafareStat.{defaultParams, parser} import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext} import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger} import org.apache.log4j.{Level, Logger}
...@@ -320,7 +321,13 @@ object Repeated_content_recommendation_moreday { ...@@ -320,7 +321,13 @@ object Repeated_content_recommendation_moreday {
""".stripMargin """.stripMargin
).rdd.map(row=>(row(0).toString,row(1).toString,row(2).toString)).map(row=>(row._2,row._3)).groupByKey() ).rdd.map(row=>(row(0).toString,row(1).toString,row(2).toString)).map(row=>(row._2,row._3)).groupByKey()
.filter(x => x._2.size >1) .filter(x => x._2.size >1)
//打印结果
val temp=exp_diary.take(10).foreach(println) val temp=exp_diary.take(10).foreach(println)
//统计每个用户重复日记个数
val count_imp=exp_diary.map(_._2).map(row=>row.map((_, 1)).reduceByKey(_ + _))
// exp_diary.show() // exp_diary.show()
// exp_diary.createOrReplaceTempView("exp_diary") // exp_diary.createOrReplaceTempView("exp_diary")
// GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append) // GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment