Commit 344c26bf authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

修改测试文件
parents 10ae88e2 d79b8092
package com.gmei
import java.io.Serializable
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
object Search_keywords_count {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("Search_keywords_count")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val stat_date = GmeiConfig.getMinusNDate(1) //获取昨日日期
//println(param.date)
val partition_date = stat_date.replace("-","")
//搜索关键词提取
val search_keywords = sc.sql(
s"""
|select params['query'] as search_keywords
|from online.tl_hdfs_maidian_view
|where action = 'search_result_click_search'
|and partition_date ='${partition_date}'
""".stripMargin
)
//搜索次数统计
val search_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(params['query']) as search_num
|from online.tl_hdfs_maidian_view
|where action = 'search_result_click_search'
|and partition_date ='${partition_date}'
""".stripMargin
)
search_count.show()
val get_result =search_keywords.rdd.map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false)
val temp=get_result.collect()
for (i <- 0 until temp.length ) {
println(temp(i))
}
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment