package com.gmei

import java.io.Serializable

import breeze.linalg.split
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{Row, SaveMode}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
import org.apache.spark
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions.row_number
import org.apache.spark.sql.types.{DoubleType, StructField, StructType}


object data_feed_exposure_precise {

  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  case class Params(env: String = "dev",
                    date: String = "2018-08-01"
                   ) extends AbstractParams[Params] with Serializable

  val defaultParams = Params()

  val parser = new OptionParser[Params]("Feed_EDA") {
    head("WeafareStat")
    opt[String]("env")
      .text(s"the databases environment you used")
      .action((x, c) => c.copy(env = x))
    opt[String] ("date")
      .text(s"the date you used")
      .action((x,c) => c.copy(date = x))
    note(
      """
        |For example, the following command runs this app on a tidb dataset:
        |
        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
      """.stripMargin +
        s"|   --env ${defaultParams.env}"
    )
  }

  def main(args: Array[String]): Unit = {
    parser.parse(args, defaultParams).map { param =>
      GmeiConfig.setup(param.env)
      val spark_env = GmeiConfig.getSparkSession()
      val sc = spark_env._2

//      val ti = new TiContext(sc)
      sc.sql("use jerry_prod")
//      ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
//      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
//      ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
//      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
//      ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")


      val stat_date = GmeiConfig.getMinusNDate(1)
//      val stat_date = param.date
      //println(param.date)
      val partition_date = stat_date.replace("-","")


      val result01=sc.sql(
        s"""
           |select
           |    from_unixtime(unix_timestamp('${partition_date}' ,'yyyyMMdd'), 'yyyy-MM-dd') as stat_date,
           |    time_stamp as time,
           |    cl_id as device_id,
           |    channel as device_type,
           |    card_content_type  as card_content_type ,
           |    card_id as business_id,
           |    if(card_content_type="diary", concat("diary|", card_id),
           |    if(card_content_type="live", concat("live|", card_id),
           |    if(card_content_type="question", concat("question|", card_id),
           |    if(card_content_type="answer", concat("answer|", card_id),
           |    if(card_content_type="article", concat("article|", card_id), null
           |    ))))) as cid,
           |    city_id as city_id
           |from online.ml_community_precise_exposure_detail
           |where card_content_type in ("article", "diary", "live", "answer", "question")
           |    and page_name='home'
           |    and tab_name="精选"
           |    and cl_id != "NULL"
           |    and partition_date='${partition_date}'
       """.stripMargin
      )
      result01.show()

      result01.createOrReplaceTempView("temp_result")

      val result02 = sc.sql(
        s"""
           |select * from temp_result
           |where device_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
           |    ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
           |    ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
           |    ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
           |    ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
           |    ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
           |    ,'promotion_shike','promotion_julang_jl03','','unknown','promotion_zuimei')
           |and device_id not in
           |       (SELECT cl_id
           |        FROM online.ml_hospital_spam_pv_day
           |        WHERE partition_date>='20180402' AND partition_date<'${partition_date}'
           |              AND pv_ratio>=0.95
           |        UNION ALL
           |        SELECT cl_id
           |        FROM online.ml_hospital_spam_pv_month
           |        WHERE partition_date>='20171101' AND partition_date<'${partition_date}'
           |            AND pv_ratio>=0.95
           |        )
       """.stripMargin
      )

      result02.createOrReplaceTempView("temp_result02")

      val result_dairy = sc.sql(
        s"""
           |select
           |    re.stat_date as stat_date,
           |    re.time as time,
           |    re.device_id as device_id,
           |    re.device_type as device_type,
           |    re.card_content_type as cid_type,
           |    re.business_id as cid_id,
           |    re.cid as cid,
           |    re.city_id as city_id,
           |    da.content_level as diary_content_level,
           |    da.created_time as diary_created_time,
           |    da.updated_time as diary_updated_time,
           |    da.service_id as diary_service_id,
           |    da.doctor_id as diary_doctor_id,
           |    da.new_topics as diary_new_topics,
           |    da.new_votes as diary_new_votes,
           |    da.new_topic_votes as diary_new_topic_votes,
           |    da.new_replies as diary_new_replies,
           |    da.new_topic_replies as diary_new_topic_replies,
           |    da.new_favor as diary_new_favor,
           |    da.show_count_choice as diary_show_count_choice,
           |    da.show_count as diary_show_count,
           |    da.click_count_choice as diary_click_count_choice,
           |    da.page_view as diary_page_view,
           |    da.user_view as diary_user_view,
           |    da.device_view as diary_device_view
           |from temp_result02 re
           |left outer join
           |(
           |    select
           |        diary_id,
           |        content_level,
           |        unix_timestamp(created_time) as created_time,
           |        unix_timestamp(last_topic_add_time) as updated_time,
           |        service_id as service_id,
           |        doctor_id as doctor_id,
           |        new_topics as new_topics,
           |        new_votes as new_votes,
           |        new_topic_votes as new_topic_votes,
           |        new_replies as new_replies,
           |        new_topic_replies as new_topic_replies,
           |        new_favor as new_favor,
           |        show_count_choice as show_count_choice,
           |        show_count as show_count,
           |        click_count_choice as click_count_choice,
           |        page_view as page_view,
           |        user_view as user_view,
           |        device_view
           |    from online.ml_community_diary_updates
           |    where partition_date='${partition_date}'
           |)da
           |on re.business_id = da.diary_id and re.card_content_type = "diary"
       """.stripMargin
      )

      result_dairy.createOrReplaceTempView("result_dairy")

      val result_answer = sc.sql(
        s"""
           |select
           |    rd.stat_date as stat_date,
           |    rd.time as time,
           |    rd.device_id as device_id,
           |    rd.device_type as device_type,
           |    rd.cid_type as cid_type,
           |    rd.cid_id as cid_id,
           |    rd.cid as cid,
           |    rd.city_id as city_id,
           |    an.is_recommend as is_recommend,
           |    if(rd.diary_new_votes is Null, an.new_votes, rd.diary_new_votes) as new_votes,
           |    if(rd.diary_new_replies is Null, an.reply_num, rd.diary_new_replies) as reply_num,
           |    if(rd.diary_content_level is Null, an.content_level, rd.diary_content_level) as content_level,
           |    if(rd.diary_created_time is Null, an.created_time, rd.diary_created_time) as created_time,
           |    rd.diary_updated_time as diary_updated_time,
           |    rd.diary_service_id as diary_service_id,
           |    rd.diary_doctor_id as diary_doctor_id,
           |    rd.diary_new_topics as diary_new_topics,
           |    rd.diary_new_topic_votes as diary_new_topic_votes,
           |    rd.diary_new_replies as diary_new_replies,
           |    rd.diary_new_topic_replies as diary_new_topic_replies,
           |    rd.diary_new_favor as diary_new_favor,
           |
           |    an.reply_vote_num as answer_reply_vote_num,
           |
           |    if(rd.diary_show_count_choice is Null, an.show_count_choice, rd.diary_show_count_choice ) as show_count_choice,
           |    if(rd.diary_show_count is Null, an.show_count, rd.diary_show_count ) as show_count,
           |    if(rd.diary_click_count_choice is Null, an.click_count_choice, rd.diary_click_count_choice) as click_count_choice,
           |    if(rd.diary_page_view is Null, an.page_view, rd.diary_page_view ) as page_view,
           |    if(rd.diary_user_view is Null, an.user_view, rd.diary_user_view ) as user_view,
           |    if(rd.diary_device_view is Null, an.device_view, rd.diary_device_view) as device_view
           |
           |from result_dairy rd
           |left outer join
           |(
           |    select
           |        answer_id,
           |        is_recommend,
           |        unix_timestamp(created_time) as created_time,
           |        content_level as content_level,
           |        vote_num as new_votes,
           |        reply_vote_num as reply_vote_num,
           |        reply_num as reply_num,
           |        show_count_choice as show_count_choice,
           |        show_count as show_count,
           |        click_count_choice as click_count_choice,
           |        page_view as page_view,
           |        user_view as user_view,
           |        device_view as device_view
           |    from online.ml_community_answer_updates
           |    where partition_date='${partition_date}'
           |)an
           |on rd.cid_id = an.answer_id and rd.cid_type = "answer"
       """.stripMargin
      )

      result_answer.createOrReplaceTempView("result_answer")

      val result_article = sc.sql(
        s"""
           |select
           |    ra.stat_date as stat_date,
           |    ra.time as time,
           |    ra.device_id as device_id,
           |    ra.device_type as device_type,
           |    ra.cid_type as cid_type,
           |    ra.cid_id as cid_id,
           |    ra.cid as cid,
           |    ra.city_id as city_id,
           |    if(ra.is_recommend is Null, ar.is_push, ra.is_recommend) as is_recommend,
           |    ar.article_type as article_type,
           |    if(ra.new_votes is Null, ar.vote_num, ra.new_votes) as new_votes,
           |    if(ra.reply_num is Null, ar.reply_num, ra.reply_num) as reply_num,
           |    ra.content_level as content_level,
           |    if(ra.created_time is Null, ar.created_time, ra.created_time) as created_time,
           |    ra.diary_updated_time as diary_updated_time,
           |    ra.diary_service_id as diary_service_id,
           |    ra.diary_doctor_id as diary_doctor_id,
           |    ra.diary_new_topics as diary_new_topics,
           |    ra.diary_new_replies as diary_new_replies,
           |    ra.diary_new_topic_votes as diary_new_topic_votes,
           |    ra.diary_new_topic_replies as diary_new_topic_replies,
           |    ra.diary_new_favor as diary_new_favor,
           |
           |    if(ra.answer_reply_vote_num is Null, ar.reply_vote_num, ra.answer_reply_vote_num) as reply_vote_num,
           |
           |    if(ra.show_count_choice is Null, ar.show_count_choice, ra.show_count_choice ) as show_count_choice,
           |    if(ra.show_count is Null, ar.show_count, ra.show_count ) as show_count,
           |    if(ra.click_count_choice is Null, ar.click_count_choice, ra.click_count_choice) as click_count_choice,
           |    if(ra.page_view is Null, ar.page_view, ra.page_view ) as page_view,
           |    if(ra.user_view is Null, ar.user_view, ra.user_view ) as user_view,
           |    if(ra.device_view is Null, ar.device_view, ra.device_view) as device_view
           |from result_answer ra
           |left outer join
           |(
           |    select
           |        article_id,
           |        unix_timestamp(created_time) as created_time,
           |        article_type as article_type,
           |        is_push,
           |        vote_num,
           |        reply_vote_num,
           |        reply_num,
           |        show_count_choice,
           |        show_count,
           |        click_count_choice,
           |        page_view,
           |        user_view,
           |        device_view
           |    from online.ml_community_article_updates
           |    where partition_date='${partition_date}'
           |)ar
           |on ra.cid_id = ar.article_id and ra.cid_type="article"
       """.stripMargin
      )

      result_article.createOrReplaceTempView("result_article")

      val result_question = sc.sql(
        s"""
           |select
           |    ra.stat_date as stat_date,
           |    ra.time as time,
           |    ra.device_id as device_id,
           |    ra.device_type as device_type,
           |    ra.cid_type as cid_type,
           |    ra.cid_id as cid_id,
           |    ra.cid as cid,
           |    ra.city_id as city_id,
           |    if(ra.is_recommend is Null, qu.is_recommend, ra.is_recommend) as is_recommend,
           |    ra.article_type as article_type,
           |    if(ra.new_votes is Null, qu.vote_num, ra.new_votes) as new_votes,
           |    if(ra.reply_num is Null, qu.reply_num, ra.reply_num) as reply_num,
           |    ra.content_level as content_level,
           |    if(ra.created_time is Null, qu.created_time, ra.created_time) as created_time,
           |    ra.diary_updated_time as diary_updated_time,
           |    ra.diary_service_id as diary_service_id,
           |    ra.diary_doctor_id as diary_doctor_id,
           |    ra.diary_new_topics as diary_new_topics,
           |    ra.diary_new_replies as diary_new_replies,
           |    ra.diary_new_topic_votes as diary_new_topic_votes,
           |    ra.diary_new_topic_replies as diary_new_topic_replies,
           |    ra.diary_new_favor as diary_new_favor,
           |
           |    ra.reply_vote_num as reply_vote_num,
           |    qu.answer_reply_num as question_answer_reply_num,
           |
           |    if(ra.show_count_choice is Null, qu.show_count_choice, ra.show_count_choice ) as show_count_choice,
           |    if(ra.show_count is Null, qu.show_count, ra.show_count ) as show_count,
           |    if(ra.click_count_choice is Null, qu.click_count_choice, ra.click_count_choice) as click_count_choice,
           |    if(ra.page_view is Null, qu.page_view, ra.page_view ) as page_view,
           |    if(ra.user_view is Null, qu.user_view, ra.user_view ) as user_view,
           |    if(ra.device_view is Null, qu.device_view, ra.device_view) as device_view
           |from result_article ra
           |left outer join
           |(
           |    select
           |        question_id,
           |        unix_timestamp(created_time) as created_time,
           |        is_recommend,
           |        answer_num as reply_num,
           |        vote_num,
           |        reply_num as answer_reply_num,
           |        show_count_choice,
           |        show_count,
           |        click_count_choice,
           |        page_view,
           |        user_view,
           |        device_view
           |    from online.ml_community_question_updates
           |    where partition_date='${partition_date}'
           |)qu
           |on ra.cid_id = qu.question_id and ra.cid_type="question"
       """.stripMargin
      )

      result_question.createOrReplaceTempView("result_question")

      val result = sc.sql(
        s"""
           |select
           |    rq.stat_date as stat_date,
           |    rq.time as time,
           |    rq.device_id as device_id,
           |    rq.device_type as device_type,
           |    rq.cid_type as cid_type,
           |    rq.cid_id as cid_id,
           |    rq.cid as cid,
           |    rq.city_id as city_id,
           |    rq.is_recommend,
           |    rq.article_type,
           |    rq.new_votes,
           |    rq.reply_num,
           |    rq.content_level,
           |    if(rq.created_time is Null, li.created_time, rq.created_time) as created_time,
           |    if(rq.diary_updated_time is Null, li.updated_time, rq.diary_updated_time) as updated_time,
           |    rq.diary_service_id as diary_service_id,
           |    rq.diary_doctor_id as diary_doctor_id,
           |    rq.diary_new_topics as diary_new_topics,
           |    rq.diary_new_replies as diary_new_replies,
           |    rq.diary_new_topic_votes as diary_new_topic_votes,
           |    rq.diary_new_topic_replies as diary_new_topic_replies,
           |    rq.diary_new_favor as diary_new_favor,
           |
           |    rq.reply_vote_num as reply_vote_num,
           |    rq.question_answer_reply_num,
           |
           |    rq.show_count_choice,
           |    if(rq.show_count is Null, li.show_count, rq.show_count ) as show_count,
           |    rq.click_count_choice,
           |    rq.page_view,
           |    rq.user_view,
           |    rq.device_view,
           |
           |    li.fake_max_num as live_fake_max_num,
           |    li.topic_id as live_topic_id,
           |    li.max_view_num as live_max_view_num,
           |    li.is_finish as live_is_finish
           |from result_question rq
           |left outer join
           |(
           |    select
           |        channel_id,
           |        unix_timestamp(created_time) as created_time,
           |        unix_timestamp(updated_time) as updated_time,
           |        pv as show_count,
           |        fake_max_num,
           |        topic_id,
           |        max_view_num,
           |        replay_danmu,
           |        is_finish
           |    from online.ml_community_live_updates
           |    where partition_date='${partition_date}'
           |)li
           |on rq.cid_id=li.channel_id and rq.cid_type="live"
       """.stripMargin
      )

//      GmeiConfig.writeToJDBCTable(result, table = "data_feed_exposure_precise", SaveMode.Append)
      print("写入开始")
//      GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result, table="data_feed_exposure_precise",SaveMode.Append)
//      print("写入结束")
      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result, table="data_feed_exposure_precise",SaveMode.Append)
      println("写入完成")

//      println("开始写入")
//      GmeiConfig.writeToJDBCTable(config.getString("jerry.jdbcuri"),result, table="data_feed_exposure_precise",SaveMode.Append)
//      println("写入完成")

    }


  }

}




object icon_train_data {

  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  case class Params(env: String = "dev",
                    date: String = "2018-08-01"
                   ) extends AbstractParams[Params] with Serializable

  val defaultParams = Params()

  val parser = new OptionParser[Params]("Feed_EDA") {
    head("WeafareStat")
    opt[String]("env")
      .text(s"the databases environment you used")
      .action((x, c) => c.copy(env = x))
    opt[String] ("date")
      .text(s"the date you used")
      .action((x,c) => c.copy(date = x))
    note(
      """
        |For example, the following command runs this app on a tidb dataset:
        |
        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
      """.stripMargin +
        s"|   --env ${defaultParams.env}"
    )
  }

  def main(args: Array[String]): Unit = {
    parser.parse(args, defaultParams).map { param =>
      GmeiConfig.setup(param.env)
      val spark_env = GmeiConfig.getSparkSession()
      val sc = spark_env._2

      sc.sql("use jerry_prod")


      val stat_date = GmeiConfig.getMinusNDate(1)  //昨天
      val qian_date = GmeiConfig.getMinusNDate(2)  //前天
      //      val stat_date = param.date
      //println(param.date)
      val partition_date = stat_date.replace("-","")

//日记本点击数据
      val diary_click=sc.sql(
        s"""
           |select cl_id as device_id, params['diary_id'] as diary_id
           |from online.tl_hdfs_maidian_view
           |where action = 'on_click_diary_card'
           |and partition_date='${partition_date}'
           |and params['diary_id'] is not null
       """.stripMargin
      )
      diary_click.show()
      diary_click.createOrReplaceTempView("diary_click")

      val diary_tags = sc.sql(
        s"""
           |select * from online.tl_hdfs_diary_tags_view
           |where partition_date = '${partition_date}'
       """.stripMargin
      )
      diary_tags.createOrReplaceTempView("diary_tags")

      val diary_tag_detail = sc.sql(
        s"""
           |select a.*,b.tag_id
           |from diary_click a
           |left join diary_tags b
           |on a.diary_id=b.diary_id
       """.stripMargin
      )

      diary_tag_detail.createOrReplaceTempView("diary_tag_detail")


      val temp_diary = sc.sql(
        s"""
           |select device_id,tag_id
           |from diary_tag_detail
       """.stripMargin
      )
      temp_diary.show()
      temp_diary.createOrReplaceTempView("temp_diary")



//      美购点击数据

      val meigou_click=sc.sql(
        s"""
           |select cl_id as device_id, params['service_id'] as service_id
           |from online.tl_hdfs_maidian_view
           |where action = 'goto_welfare_detail'
           |and partition_date='${partition_date}'
           |and params['service_id'] is not null
       """.stripMargin
      )
      meigou_click.show()
      meigou_click.createOrReplaceTempView("meigou_click")

      val megou_order=sc.sql(
        s"""
           |SELECT device_id,service_id
           |from online.ml_meigou_order_detail
           |where partition_date='${partition_date}'
           |and device_id is not null
           |and validate_time >'${qian_date}'
           |and validate_time <='${stat_date}'
       """.stripMargin
      )
      megou_order.createOrReplaceTempView("megou_order")

      val meigou_jiache=sc.sql(
        s"""
           |SELECT T5_2.device_id AS device_id,
           |       T5_1.service_id AS service_id
           |  FROM (SELECT DISTINCT T.person_id,
           |                        T.service_id AS service_id
           |          FROM online.tl_meigou_shopcart_view T
           |         WHERE T.partition_date = '${partition_date}'
           |           AND from_unixtime(unix_timestamp(to_date(T.added_at), 'yyyy-MM-dd'), 'yyyyMMdd') = '${partition_date}') T5_1 --加入购物车的时间
           |  LEFT JOIN (SELECT T.person_id    AS person_id,
           |                    T5_3.device_id AS device_id
           |               FROM online.ml_user_updates T
           |            LATERAL VIEW explode(T.device_list) T5_3 AS device_id
           |              WHERE T.partition_date = '${partition_date}') T5_2
           |    ON T5_1.person_id = T5_2.person_id
       """.stripMargin
      )
      meigou_jiache.createOrReplaceTempView("meigou_jiache")

      val zixun_meigou=sc.sql(
        s"""
           |SELECT cl_id as device_id,params['service_id'] as service_id
           |from online.tl_hdfs_maidian_view
           |where partition_date='${partition_date}'
           |and (action='welfare_detail_click_phone' or action='welfare_detail_click_consult' or action='welfare_detail_click_message' or action='welfare_detail_click_curearea_contact')
       """.stripMargin
      )
      zixun_meigou.createOrReplaceTempView("zixun_meigou")

      val meigou_all=sc.sql(
        s"""
           |SELECT * from meigou_click
           |union all
           |select * from megou_order
           |union all
           |select * from meigou_jiache
           |union all
           |select * from zixun_meigou
       """.stripMargin
      )
      meigou_all.createOrReplaceTempView("meigou_all")



      val meigou_tags = sc.sql(
        s"""
           |select * from online.tl_meigou_servicetag_view
           |where partition_date = '${partition_date}'
       """.stripMargin
      )
      meigou_tags.createOrReplaceTempView("meigou_tags")

      val meigou_tag_detail = sc.sql(
        s"""
           |select a.*,b.tag_id
           |from meigou_all a
           |left join meigou_tags b
           |on a.service_id=b.service_id
       """.stripMargin
      )

      meigou_tag_detail.createOrReplaceTempView("meigou_tag_detail")


      val temp_meigou = sc.sql(
        s"""
           |select device_id,tag_id
           |from meigou_tag_detail
       """.stripMargin
      )

      temp_meigou.createOrReplaceTempView("temp_meigou")

      // 百科点击数据
      val wiki_click=sc.sql(
        s"""
           |SELECT cl_id as device_id,params['business_id'] as wiki_id
           |from online.tl_hdfs_maidian_view
           |where partition_date='${partition_date}'
           |and (action='Search_result_wiki_click_item' or action='search_result_click_infomation_item' or action='search_result_wiki_click_recommend_wiki')
       """.stripMargin
      )
      wiki_click.createOrReplaceTempView("wiki_click")

      val wiki_tag=sc.sql(
        s"""
           |SELECT item_id,tag_id
           |from online.tl_hdfs_wiki_item_tag_view
           |where partition_date='${partition_date}'
       """.stripMargin
      )
      wiki_tag.createOrReplaceTempView("wiki_tag")

      val wiki_device_tag=sc.sql(
        s"""
           |SELECT a.device_id,b.tag_id
           |from wiki_click a
           |left join wiki_tag b
           |on a.wiki_id=b.item_id
           |where b.tag_id is not null
       """.stripMargin
      )
      wiki_device_tag.createOrReplaceTempView("wiki_device_tag")


      val final_train = sc.sql(
        s"""
           |select '${stat_date}' as stat_date,*
           |from temp_diary
           |union all
           |select '${stat_date}' as stat_date,*
           |from temp_meigou
           |union all
           |select '${stat_date}' as stat_date,*
           |from wiki_device_tag
       """.stripMargin
      )
      final_train.show()


      println("开始写入")
      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",final_train, table="icon_train_data",SaveMode.Append)
      println("写入完成")


    }


  }

}



object tag_value {

  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  case class Params(env: String = "dev",
                    date: String = "2018-08-01"
                   ) extends AbstractParams[Params] with Serializable

  val defaultParams = Params()

  val parser = new OptionParser[Params]("Feed_EDA") {
    head("WeafareStat")
    opt[String]("env")
      .text(s"the databases environment you used")
      .action((x, c) => c.copy(env = x))
    opt[String] ("date")
      .text(s"the date you used")
      .action((x,c) => c.copy(date = x))
    note(
      """
        |For example, the following command runs this app on a tidb dataset:
        |
        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
      """.stripMargin +
        s"|   --env ${defaultParams.env}"
    )
  }

  def main(args: Array[String]): Unit = {
    parser.parse(args, defaultParams).map { param =>
      GmeiConfig.setup(param.env)
      val spark_env = GmeiConfig.getSparkSession()
      val sc = spark_env._2

      sc.sql("use jerry_prod")
      import sc.implicits._


      val stat_date = GmeiConfig.getMinusNDate(1)
      //      val stat_date = param.date
      //println(param.date)
      val partition_date = stat_date.replace("-","")


      val t1=2.5
      val t2=5.0
      val t3=10.0

      //level_id到权重映射
      val level1_id=sc.sql(
        s"""
           |select distinct(id) as level_id,'${t1}' as value
           |from online.bl_tag_hierarchy_detail
           |where partition_date = '${partition_date}'
           |and id !=0
           |and tag_type='1'
       """.stripMargin
      )
      level1_id.createOrReplaceTempView("level1_id")

      val level2_id=sc.sql(
        s"""
           |select distinct(id) as level_id,'${t2}' as value
           |from online.bl_tag_hierarchy_detail
           |where partition_date = '${partition_date}'
           |and id !=0
           |and tag_type='2'
       """.stripMargin
      )
      level2_id.createOrReplaceTempView("level2_id")

      val level3_id=sc.sql(
        s"""
           |select distinct(id) as level_id,'${t3}' as value
           |from online.bl_tag_hierarchy_detail
           |where partition_date = '${partition_date}'
           |and id !=0
           |and tag_type='3'
       """.stripMargin
      )
      level3_id.createOrReplaceTempView("level3_id")


      val level_value=sc.sql(
        s"""
           |select *
           |from level1_id
           |union all
           |select *
           |from level2_id
           |union all
           |select *
           |from level3_id
       """.stripMargin
      )

      level_value.createOrReplaceTempView("tagId_value")
//      tag_value.show(300)

      println("开始写入")
      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",level_value, table="tagId_value",SaveMode.Overwrite)
      println("写入完成")


      //level_id到index映射

      val level_index_temp=sc.sql(
        s"""
           |select id as tag_id
           |from online.bl_tag_hierarchy_detail
           |where partition_date = '${partition_date}'
           |and id !=0
       """.stripMargin
      )
      level_index_temp.createOrReplaceTempView("level_index_temp")




//      val result = level_index_temp.select("tag_id").distinct().rdd.map{x => x.toString().substring(1,x.toString().length - 1)}.zipWithIndex().toDF("level_id","index_id")
//      val test=result.select(result.col("level_id").cast(DoubleType).as("level_id"),result.col("index_id").cast(DoubleType).as("index_id"))
//      test.createOrReplaceTempView("tag_level_index")
////      val resDf = spark.createDataFrame(rowRdd)
//
//
//      println("开始写入")
//      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",test, table="tag_level_index",SaveMode.Overwrite)
//
//      println("写入完成")

//  zhengxing库里面的数据同步到jerry_prod


      val icon_today_train_data=sc.sql(
        s"""
           |select a.device_id,c.index_id as action,b.value from jerry_prod.icon_train_data a
           |left join tagId_value b on a.tag_id=b.level_id
           |left join jerry_prod.tag_level_index c on a.tag_id=c.level_id
           |where c.index_id is not null and b.value is not null
           |and a.stat_date='${stat_date}'
       """.stripMargin
      )

      println("开始写入")
      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",icon_today_train_data, table="icon_today_train_data_test",SaveMode.Overwrite)

      println("写入完成")


    }


  }

}




object app_list_yunying {

  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  case class Params(env: String = "dev",
                    date: String = "2018-08-01"
                   ) extends AbstractParams[Params] with Serializable

  val defaultParams = Params()

  val parser = new OptionParser[Params]("Feed_EDA") {
    head("WeafareStat")
    opt[String]("env")
      .text(s"the databases environment you used")
      .action((x, c) => c.copy(env = x))
    opt[String] ("date")
      .text(s"the date you used")
      .action((x,c) => c.copy(date = x))
    note(
      """
        |For example, the following command runs this app on a tidb dataset:
        |
        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
      """.stripMargin +
        s"|   --env ${defaultParams.env}"
    )
  }

  def main(args: Array[String]): Unit = {
    parser.parse(args, defaultParams).map { param =>
      GmeiConfig.setup(param.env)
      val spark_env = GmeiConfig.getSparkSession()
      val sc = spark_env._2

      sc.sql("use jerry_prod")
      import sc.implicits._


      val stat_date = GmeiConfig.getMinusNDate(1)
      //      val stat_date = param.date
      //println(param.date)
      val partition_date = stat_date.replace("-","")

      val device_id_newUser = sc.sql(
        s"""
           |select distinct(device_id) as device_id
           |from online.ml_device_month_active_status
           |where active_type != '4'
           |and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
           |    ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
           |    ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
           |    ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
           |    ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
           |    ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
           |    ,'promotion_shike','promotion_julang_jl03','','unknown','promotion_zuimei')
           |and partition_date ='20190531'
         """.stripMargin
      )
      device_id_newUser.createOrReplaceTempView("device_id_new")

      val app_list = sc.sql(
        s"""
           |select *
           |from
           |(select cl_id,
           |    time_stamp,
           |    params['installed_app_info'] as app_list,
           |        row_number() over (partition by cl_id order by time_stamp desc) as pk
           | from online.tl_hdfs_maidian_view
           |where partition_date >='20190501'
           |  and action = 'user_installed_all_app_info'
           |  and cl_type='android'
           | ) a
           |where a.pk = 1
         """.stripMargin
      )
      app_list.createOrReplaceTempView("app_list")

      val final_app_list = sc.sql(
        s"""
           |select *
           |from
           |app_list a
           |inner join device_id_new b
           |on a.cl_id=b.device_id
         """.stripMargin
      )


      val result1 = final_app_list
      result1.show()

      println("开始写入")




      println("开始写入")
      GmeiConfig.writeToJDBCTable("jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result1, table="app_list_yunying",SaveMode.Overwrite)

      println("写入完成")


    }


  }

}










object test_data {

  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)

  case class Params(env: String = "dev",
                    date: String = "2018-08-01"
                   ) extends AbstractParams[Params] with Serializable

  val defaultParams = Params()

  val parser = new OptionParser[Params]("Feed_EDA") {
    head("WeafareStat")
    opt[String]("env")
      .text(s"the databases environment you used")
      .action((x, c) => c.copy(env = x))
    opt[String] ("date")
      .text(s"the date you used")
      .action((x,c) => c.copy(date = x))
    note(
      """
        |For example, the following command runs this app on a tidb dataset:
        |
        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
      """.stripMargin +
        s"|   --env ${defaultParams.env}"
    )
  }

  def main(args: Array[String]): Unit = {
    parser.parse(args, defaultParams).map { param =>
      GmeiConfig.setup(param.env)
      val spark_env = GmeiConfig.getSparkSession()
      val sc = spark_env._2

      sc.sql("use jerry_prod")


      val stat_date = GmeiConfig.getMinusNDate(1)
      //      val stat_date = param.date
      //println(param.date)
      val partition_date = stat_date.replace("-","")


// 百科点击数据

      val wiki_click=sc.sql(
        s"""
           |SELECT cl_id as device_id,params['business_id'] as wiki_id
           |from online.tl_hdfs_maidian_view
           |where partition_date='${partition_date}'
           |and (action='Search_result_wiki_click_item' or action='search_result_click_infomation_item' or action='search_result_wiki_click_recommend_wiki')
       """.stripMargin
      )
      wiki_click.createOrReplaceTempView("wiki_click")

      val wiki_tag=sc.sql(
        s"""
           |SELECT item_id,tag_id
           |from online.tl_hdfs_wiki_item_tag_view
           |where partition_date='${partition_date}'
       """.stripMargin
      )
      wiki_tag.createOrReplaceTempView("wiki_tag")

      val wiki_device_tag=sc.sql(
        s"""
           |SELECT a.device_id,b.tag_id
           |from wiki_click a
           |left join wiki_tag b
           |on a.wiki_id=b.item_id
           |where b.tag_id is not null
       """.stripMargin
      )
      wiki_device_tag.createOrReplaceTempView("wiki_device_tag")


    }


  }

}
