Commit 22def35a authored by 王志伟's avatar 王志伟

过滤异常数据

parent 8f5c89a0
......@@ -7,6 +7,9 @@ import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
import org.dmg.pmml.True
import scala.util.parsing.json.JSON
object Search_keywords_count {
......@@ -37,6 +40,16 @@ object Search_keywords_count {
)
}
//定义异常捕获
def catch_error(x:String)={
try{
val result=JSON.parseFull(x)
}
catch {
case e: ArithmeticException => println(e)
}
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
......@@ -45,6 +58,8 @@ object Search_keywords_count {
val stat_date = GmeiConfig.getMinusNDate(1) //获取昨日日期
//println(param.date)
val partition_date = stat_date.replace("-","")
//搜索关键词提取
val search_keywords = sc.sql(
s"""
......@@ -65,7 +80,7 @@ object Search_keywords_count {
)
search_count.show()
val get_result =search_keywords.rdd.map((_, 1)).reduceByKey(_ + _)
val get_result =search_keywords.rdd.map(x=>catch_error(x.toString())).map((_, 1)).reduceByKey(_ + _)
.sortBy(_._2,false)
val temp=get_result.collect()
for (i <- 0 until temp.length ) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment