Commit 58644fb0 authored by 高雅喆's avatar 高雅喆

add GetPortrait

parent 651f26a2
...@@ -226,7 +226,6 @@ object EsmmData { ...@@ -226,7 +226,6 @@ object EsmmData {
object EsmmPredData { object EsmmPredData {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
...@@ -425,6 +424,77 @@ object EsmmPredData { ...@@ -425,6 +424,77 @@ object EsmmPredData {
sc.stop()
}
}
}
object GetPortrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = GmeiConfig.getMinusNDate(1)
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("EsmmData")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String]("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.EsmmData ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
val diary_tag = sc.sql(
s"""
|select d.diary_id,
|(case when d.tag_type = '1' then d.level1_ids else "" end) level1_ids,
|(case when d.tag_type = '2' then d.level2_ids else "" end) level2_ids,
|(case when d.tag_type = '3' then d.level3_ids else "" end) level3_ids from
| (select c.diary_id,c.tag_type,
| group_concat(c.level1_id) as level1_ids
| group_concat(c.level2_id) as level2_ids
| group_concat(c.level3_id) as level3_ids from
| (select a.diary_id,a.tag_id,b.tag_type,b.level1_id,b.level2_id,b.level3_id
| from tl_hdfs_diary_tags_view a
| left join bl_tag_hierarchy_detail b
| on a.tag_id = b.id
| where a.partition_date = '20181218'
| and b.partition_date = '20181218') c
| group by c.diary_id,c.tag_type) d
|group by d.diary_id
""".stripMargin
)
diary_tag.show()
sc.stop() sc.stop()
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment