chang run.sh

5edec3f4 · 王志伟 · 6bcae79b · 5edec3f4 · 5edec3f4
Commit 5edec3f4 authored Nov 01, 2018 by 王志伟
Hide whitespace changes
Inline Side-by-side

Showing with 153 additions and 40 deletions

run.sh eda/feededa/run.sh +12 -11

testt.scala eda/feededa/src/main/scala/com/gmei/testt.scala +141 -29

No files found.
--- a/eda/feededa/run.sh
+++ b/eda/feededa/run.sh
-if [[ $# -ne 2  ]];then
-    echo 'Usage:'$0' <startdate> <enddate>'
-    exit
-fi
-startdate=`date -d "$1" +%Y-%m-%d`
-enddate=`date -d "$2" +%Y-%m-%d`
+ if [[ $# -ne 2 ]];then
+     echo 'Usage:'$0' <startdate> <enddate>'
+     exit
+ fi

-while [[ $startdate < $enddate]]
-do
-    /opt/spark/bin/spark-submit --master spark://10.31.242.83:7077  --total-executor-cores 20 --executor-memory 3g  --executor-cores 2 --driver-memory 8g --conf spark.default.parallelism=200  --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --class com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >>ctr1.log
-    startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
-done
+ startdate=`date -d "$1" +%Y-%m-%d`
+ enddate=`date -d "$2" +%Y-%m-%d`
+
+ while [[ $startdate < $enddate  ]]
+ do
+      /opt/spark/bin/spark-submit --master spark://10.31.242.83:7077  --total-executor-cores 10 --executor-memory 3g  --executor-cores 2 --driver-memory 8g --class com.gmei.         strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >> ctr2.log
+     startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
+ done
--- a/eda/feededa/src/main/scala/com/gmei/testt.scala
+++ b/eda/feededa/src/main/scala/com/gmei/testt.scala
 package com.gmei
-import java.text.SimpleDateFormat
-import java.util.{Calendar, Date}
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import scala.collection.mutable.ArrayBuffer
+
+import java.io.Serializable
+
+import com.gmei.WeafareStat.{defaultParams, parser}
+import org.apache.spark.sql.{SaveMode, TiContext}
+import org.apache.log4j.{Level, Logger}
+import scopt.OptionParser
+import com.gmei.lib.AbstractParams

 object testt {

-  def main(args: Array[String]): Unit ={
-    val dateArray2 = get_date()
-    println(dateArray2(0))
-    for (elem <- dateArray2) {
-      println(elem)
-    }
+  Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
+  Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
+
+  case class Params(env: String = "dev",
+                    date: String = "2018-08-01"
+                   ) extends AbstractParams[Params] with Serializable

+  val defaultParams = Params()

+  val parser = new OptionParser[Params]("Feed_EDA") {
+    head("WeafareStat")
+    opt[String]("env")
+      .text(s"the databases environment you used")
+      .action((x, c) => c.copy(env = x))
+    opt[String] ("date")
+      .text(s"the date you used")
+      .action((x,c) => c.copy(date = x))
+    note(
+      """
+        |For example, the following command runs this app on a tidb dataset:
+        |
+        | spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
+      """.stripMargin +
+        s"|   --env ${defaultParams.env}"
+    )
  }
-  def get_date(): ArrayBuffer[String] ={
-    val startTime = "2017-12-01"
-    val endTime = "2017-12-10"
-    val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
-    val dateFiled = Calendar.DAY_OF_MONTH
-    var beginDate = dateFormat.parse(startTime)
-    val endDate = dateFormat.parse(endTime)
-    val calendar = Calendar.getInstance()
-    calendar.setTime(beginDate)
-    val dateArray: ArrayBuffer[String] = ArrayBuffer()
-    while (beginDate.compareTo(endDate) <= 0) {
-      dateArray += dateFormat.format(beginDate)
-      calendar.add(dateFiled, 1)
-      beginDate = calendar.getTime
+
+  def main(args: Array[String]): Unit = {
+    parser.parse(args, defaultParams).map { param =>
+      GmeiConfig.setup(param.env)
+      val spark_env = GmeiConfig.getSparkSession()
+      val sc = spark_env._2
+
+      val ti = new TiContext(sc)
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
+      ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
+      ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
+
+
+      import sc.implicits._
+      //      val stat_date = GmeiConfig.getMinusNDate(1)
+      println(param.date)
+      val partition_date = param.date.replace("-","")
+      val decive_id_oldUser = sc.sql(
+        s"""
+           |select distinct(device_id) as decive_id
+           |from online.ml_device_day_active_status
+           |where active_type = '4'
+           |and partition_date ='${partition_date}'
+         """.stripMargin
+      )
+      decive_id_oldUser.show()
+      decive_id_oldUser.createOrReplaceTempView("device_id_old")
+
+      val decive_id_newUser = sc.sql(
+        s"""
+           |select distinct(device_id) as decive_id
+           |from online.ml_device_day_active_status
+           |where active_type != '4'
+           |and partition_date ='${partition_date}'
+         """.stripMargin
+      )
+      decive_id_newUser.show()
+      decive_id_newUser.createOrReplaceTempView("device_id_newUser")
+
+      val strategies = Seq("[1|2]$","[3|4]$","[5|6]$","[7|8]$")
+      for (strategy <- strategies){
+        val clk_count_oldUser = sc.sql(
+          s"""
+             |select '${param.date}' as stat_date, count(cid_id) as get_clk_count_old
+             |from data_feed_click jd inner join device_id_old
+             |on jd.device_id = device_id_old.decive_id
+             |where  (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
+             |and jd.device_id regexp'${strategy}'
+             |and jd.device_id not in (select device_id from bl_device_list)
+             |and jd.device_id not in (select device_id from blacklist)
+             |and jd.stat_date ='${param.date}'
+         """.stripMargin
+        )
+
+        val imp_count_oldUser = sc.sql(
+          s"""
+             |select '${param.date}' as stat_date, count(cid_id) as get_imp_count_old
+             |from data_feed_exposure je inner join device_id_old
+             |on je.device_id = device_id_old.decive_id
+             |where je.cid_type = 'diary'
+             |and je.device_id regexp'${strategy}'
+             |and je.device_id not in (select device_id from bl_device_list)
+             |and je.device_id not in (select device_id from blacklist)
+             |and je.stat_date ='${param.date}'
+         """.stripMargin
+        )
+
+
+        val clk_count_newUser = sc.sql(
+          s"""
+             |select '${param.date}' as stat_date, count(cid_id) as get_clk_count_newUser
+             |from data_feed_click jd inner join device_id_newUser
+             |on jd.device_id = device_id_newUser.decive_id
+             |where  (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
+             |and jd.device_id regexp'${strategy}'
+             |and jd.device_id not in (select device_id from bl_device_list)
+             |and jd.device_id not in (select device_id from blacklist)
+             |and jd.stat_date ='${param.date}'
+         """.stripMargin
+        )
+
+        val imp_count_newUser = sc.sql(
+          s"""
+             |select '${param.date}' as stat_date, count(cid_id) as get_imp_count_newUser
+             |from data_feed_exposure je inner join device_id_newUser
+             |on je.device_id = device_id_newUser.decive_id
+             |where je.cid_type = 'diary'
+             |and je.device_id regexp'${strategy}'
+             |and je.device_id not in (select device_id from bl_device_list)
+             |and je.device_id not in (select device_id from blacklist)
+             |and je.stat_date ='${param.date}'
+         """.stripMargin
+        )
+        imp_count_newUser.show()
+
+        val result = clk_count_oldUser.join(imp_count_oldUser,"stat_date")
+          .join(clk_count_newUser,"stat_date")
+          .join(imp_count_newUser,"stat_date")
+        result.show()
+
+
+
+      }
+
+
+
+
    }
-    //println(dateArray)
-    dateArray
-  }


+  }

 }
-