Commit 84a07c97 authored by 王志伟's avatar 王志伟

new Demand

parent 109a6a45
package com.gmei
import java.io.Serializable
import java.text.SimpleDateFormat
import java.util.Calendar
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
......@@ -10,30 +8,30 @@ import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
import scala.util.parsing.json.JSON
object testt {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev") extends AbstractParams[Params] with Serializable
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("testt")
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
//opt[String] ("date")
// .text(s"the date you used")
// .action((x,c) => c.copy(date = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.testt ./target/scala-2.11/feededa-assembly-0.1.jar \
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
......@@ -52,45 +50,43 @@ object testt {
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
val strategies = Seq("3$","4$","5$","6$","7$","8$","c$","d$","e$","A$","B$","C$","D$")
for (strategy <- strategies){
println(strategy)
for(i <- 1 to 10){
val cal = Calendar.getInstance
cal.add(Calendar.DATE, -i)
//设置时间格式
val time = cal.getTime
//将时间格式套用在获取的时间戳上
val newtime: String = new SimpleDateFormat("yyyyMMdd").format(time)
println(newtime)
val get_data_dura = sc.sql(
s"""
|select partition_date,params["duration"]
|from online.tl_hdfs_maidian_view
|where action="on_app_session_over"
|and cl_id regexp'${strategy}'
|and partition_date = ${newtime}
import sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
println(param.date)
val partition_date = param.date.replace("-","")
val diary_id = sc.sql(
s"""
|select distinct(cid_id) as diary_id
|from data_feed_click
""".stripMargin
)
get_data_dura.printSchema
get_data_dura.show(5)
)
diary_id.show()
diary_id.createOrReplaceTempView("diary_id")
val clk_count = sc.sql(
s"""
|select sum(params["out"]-params["in"]) as dur_time count(cl_id) as num_clk dur_time/num_clk as avg_dur
|from online.tl_hdfs_maidian_view tl inner join diary_id
|on tl.params["business_id"] = diary_id.diary_id
|where action="page_view"
|and partition_date >='20180801' and partition_date <'20181107'
|group by tl.params["business_id"] order by avg_dur desc
""".stripMargin
)
val tst = get_data_dura.filter("params[duration] > 0")
val tst2 = tst.filter("params[duration] > 0")
val result = clk_count
result.show()
// println(rows(1)) 这样会报错
}
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment