Commit 0c62cccd authored by 王志伟's avatar 王志伟

new files

No related merge requests found
package com.gmei
import java.io.Serializable
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
object find_bug {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date = param.date
//println(param.date)
val partition_date = stat_date.replace("-","")
val clk_count_all = sc.sql(
s"""
|select stat_date,count(cid_id) as clk_count_all
|from data_feed_click
|where (cid_type = 'diary' or cid_type = 'diary_video')
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
""".stripMargin
)
val imp_count_all = sc.sql(
s"""
|select stat_date,count(cid_id) as imp_count_all
|from data_feed_exposure
|where cid_type = 'diary'
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
""".stripMargin
)
val result1 = clk_count_all.join(imp_count_all,"stat_date")
result1.show()
GmeiConfig.writeToJDBCTable(result1, "find_bug", SaveMode.Append)
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment