Commit 58ad143d authored by 高雅喆's avatar 高雅喆

add esmm data

parent e249d07a
package com.gmei
import java.io.Serializable
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
object EsmmData {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("EsmmData")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.EsmmData ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "eagle",tableName = "src_mimas_prod_api_diary_tags")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_exposure")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(14)
println(stat_date)
val imp_data = sc.sql(
s"""
|select stat_date,device_id,city_id as ucity_id,
| cid_id,diary_service_id
|from data_feed_exposure
|where cid_type = 'diary'
|and stat_date >'${stat_date}'
""".stripMargin
)
imp_data.show()
imp_data.createOrReplaceTempView("imp_data")
val clk_data = sc.sql(
s"""
|select stat_date,device_id,city_id as ucity_id,
| cid_id,diary_service_id
|from data_feed_click
|where cid_type = 'diary'
|and stat_date >'${stat_date}'
""".stripMargin
)
clk_data.show()
clk_data.createOrReplaceTempView("clk_data")
val stat_date_not = GmeiConfig.getMinusNDate(14).replace("-","")
val cvr_data = sc.sql(
s"""
|select
| from_unixtime(unix_timestamp('${stat_date_not}' ,'yyyyMMdd'), 'yyyy-MM-dd') as stat_date,
| cl_id as device_id,city_id as ucity_id,
| params["referrer_id"] as cid_id,params["business_id"] as diary_service_id
|from online.tl_hdfs_maidian_view
|where action='page_view'
|and partition_date >'${stat_date_not}'
|and params['page_name'] = 'welfare_detail'
|and params['referrer'] = 'diary_detail'
""".stripMargin
)
cvr_data.show()
cvr_data.createOrReplaceTempView("cvr_data")
sc.stop()
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment