Commit 02fc3652 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add app list
parents 84f2c70e 4aa034f8
......@@ -433,7 +433,7 @@ object EsmmPredData {
}
object GetPortrait {
object GetDiaryPortrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
......@@ -496,6 +496,80 @@ object GetPortrait {
GmeiConfig.writeToJDBCTable(diary_tag,"diary_feat",SaveMode.Overwrite)
sc.stop()
}
}
}
object GetDevicePortrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = GmeiConfig.getMinusNDate(1)
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("EsmmData")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String]("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.EsmmData ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod",tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod",tableName = "diary_feat")
val stat_date = param.date.replace("-","")
val device_search_tag = sc.sql(
s"""
|select a.cl_id as device_id,COALESCE(a.params['diary_id'], a.params['business_id'], 0) as cid_id,split(c.level1_ids,',')[0] as level1_id
|from online.tl_hdfs_maidian_view as a
|left join
| (select cl_id as device_id,max(partition_date) as stat_date
| from online.tl_hdfs_maidian_view
| where action = 'on_click_diary_card'
| and (params['page_name']='search_result_diary' or params['page_name']='search_result_more')
| and partition_date > '20180601' group by cl_id) b
|on a.cl_id = b.device_id and a.partition_date=b.stat_date
|left join diary_feat c
|on COALESCE(a.params['diary_id'], a.params['business_id'], 0) = c.diary_id
|where a.partition_date > '20180601'
|and b.params['business_id'] != 0
|and c.level1_ids != ""
""".stripMargin
)
device_search_tag.show()
println(device_search_tag.count())
// GmeiConfig.writeToJDBCTable(diary_tag,"diary_feat",SaveMode.Overwrite)
sc.stop()
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment