Commit 231711b1 authored by 王志伟's avatar 王志伟

get new data 通过testt文件

parent 5edec3f4
......@@ -14,7 +14,7 @@ object testt {
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
//date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
......@@ -24,9 +24,9 @@ object testt {
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
//opt[String] ("date")
// .text(s"the date you used")
// .action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
......@@ -53,91 +53,20 @@ object testt {
import sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1)
println(param.date)
val partition_date = param.date.replace("-","")
val decive_id_oldUser = sc.sql(
s"""
|select distinct(device_id) as decive_id
|from online.ml_device_day_active_status
|where active_type = '4'
|and partition_date ='${partition_date}'
""".stripMargin
)
decive_id_oldUser.show()
decive_id_oldUser.createOrReplaceTempView("device_id_old")
val decive_id_newUser = sc.sql(
s"""
|select distinct(device_id) as decive_id
|from online.ml_device_day_active_status
|where active_type != '4'
|and partition_date ='${partition_date}'
""".stripMargin
)
decive_id_newUser.show()
decive_id_newUser.createOrReplaceTempView("device_id_newUser")
val strategies = Seq("[1|2]$","[3|4]$","[5|6]$","[7|8]$")
val strategies = Seq("3$","4$","5$","6$","7$","8$","a$","b$","c$","d$","e$","A$","B$","C$","D$")
for (strategy <- strategies){
val clk_count_oldUser = sc.sql(
s"""
|select '${param.date}' as stat_date, count(cid_id) as get_clk_count_old
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
""".stripMargin
)
val imp_count_oldUser = sc.sql(
s"""
|select '${param.date}' as stat_date, count(cid_id) as get_imp_count_old
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id
|where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
""".stripMargin
)
val clk_count_newUser = sc.sql(
val get_data_dura = sc.sql(
s"""
|select '${param.date}' as stat_date, count(cid_id) as get_clk_count_newUser
|from data_feed_click jd inner join device_id_newUser
|on jd.device_id = device_id_newUser.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
|select partition_date, sum(params['duration']) as total_dur,count(distinct(cl_id)) as num
|from online.tl_hdfs_maidian_view
|where where action="on_app_session_over"
|and cl_id regexp '${strategy}'
|and params['duration']>=0 and params['duration']<86400
|and partition_date >='20180801'
|group by partition_date order by partition_date desc
""".stripMargin
)
val imp_count_newUser = sc.sql(
s"""
|select '${param.date}' as stat_date, count(cid_id) as get_imp_count_newUser
|from data_feed_exposure je inner join device_id_newUser
|on je.device_id = device_id_newUser.decive_id
|where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
""".stripMargin
)
imp_count_newUser.show()
val result = clk_count_oldUser.join(imp_count_oldUser,"stat_date")
.join(clk_count_newUser,"stat_date")
.join(imp_count_newUser,"stat_date")
result.show()
get_data_dura.show()
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment