package com.gmei import java.io.Serializable import com.gmei.WeafareStat.{defaultParams, parser} import org.apache.spark.sql.{SaveMode, TiContext} import org.apache.log4j.{Level, Logger} import scopt.OptionParser import com.gmei.lib.AbstractParams import com.gmei.GmeiConfig.{writeToJDBCTable,getMinusNDate} object testt { Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF) case class Params(env: String = "dev" ) extends AbstractParams[Params] with Serializable val defaultParams = Params() val parser = new OptionParser[Params]("Feed_EDA") { head("WeafareStat") opt[String]("env") .text(s"the databases environment you used") .action((x, c) => c.copy(env = x)) note( """ |For example, the following command runs this app on a tidb dataset: | | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \ """.stripMargin + s"| --env ${defaultParams.env}" ) } def main(args: Array[String]): Unit = { parser.parse(args, defaultParams).map { param => GmeiConfig.setup(param.env) val spark_env = GmeiConfig.getSparkSession() val sc = spark_env._2 val ti = new TiContext(sc) ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video") ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click") ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist") ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure") //机构id // val agency_id = sc.sql( // s""" // |SELECT DISTINCT(cl_id) as device_id // |FROM online.ml_hospital_spam_pv_day // |WHERE partition_date >= '20180402' // |AND partition_date <= '20181203' // |AND pv_ratio >= 0.95 // |UNION ALL // |SELECT DISTINCT(cl_id) as device_id // |FROM online.ml_hospital_spam_pv_month // |WHERE partition_date >= '20171101' // |AND partition_date <= '20181203' // |AND pv_ratio >= 0.95 // """.stripMargin // ) //// agency_id.show() // agency_id.createOrReplaceTempView("agency_id") val a = Array("answer_detail","article_detail","diary_detail","home_page","question_detail","search_content","search_diary") for (i <- 0 until a.length){ val diary_pv = sc.sql( s""" |select partition_date,count(cl_id) |from online.ml_community_exposure_detail |where business_type ='diary' |and partition_date >='20181120' |and event='${a(i)}' |GROUP BY partition_date |order by partition_date """.stripMargin ) println("该来源的数据为:"+ a(i)) diary_pv.show() } // val diary_pv = sc.sql( // s""" // |select partition_date,count(cl_id) // |from online.tl_hdfs_maidian_view // |where action ='on_click_diary_card' // |and params["page_name"]="home" // |and params["tab_name"]="精选" // |and partition_date >='20181120' // |GROUP BY partition_date // |order by partition_date // """.stripMargin // ) // diary_pv.show() // GmeiConfig.writeToJDBCTable(view_count, "avg", SaveMode.Overwrite) } } }