Commit b86483fe authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

增加曝光量统计指标
parents dc617a5d 2af16a4a
if [[ $# -ne 2 ]];then if [[ $# -ne 2 ]];then
echo 'Usage:'$0' <startdate> <enddate>' echo 'Usage:'$0' <startdate> <enddate>'
exit exit
fi fi
startdate=`date -d "$1" +%Y-%m-%d`
enddate=`date -d "$2" +%Y-%m-%d`
while [[ $startdate < $enddate]] startdate=`date -d "$1" +%Y-%m-%d`
do enddate=`date -d "$2" +%Y-%m-%d`
/opt/spark/bin/spark-submit --master spark://10.31.242.83:7077 --total-executor-cores 20 --executor-memory 3g --executor-cores 2 --driver-memory 8g --conf spark.default.parallelism=200 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --class com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >>ctr1.log
startdate=`date -d "+1 day $startdate" +%Y-%m-%d` while [[ $startdate < $enddate ]]
done do
/opt/spark/bin/spark-submit --master spark://10.31.242.83:7077 --total-executor-cores 10 --executor-memory 3g --executor-cores 2 --driver-memory 8g --class com.gmei. strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >> ctr2.log
startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
done
package com.gmei package com.gmei
import java.text.SimpleDateFormat
import java.util.{Calendar, Date} import java.io.Serializable
import java.text.SimpleDateFormat
import java.util.Calendar import com.gmei.WeafareStat.{defaultParams, parser}
import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
object testt { object testt {
def main(args: Array[String]): Unit ={ Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
val dateArray2 = get_date() Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
println(dateArray2(0))
for (elem <- dateArray2) { case class Params(env: String = "dev") extends AbstractParams[Params] with Serializable
println(elem)
}
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
//opt[String] ("date")
// .text(s"the date you used")
// .action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.testt ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
} }
def get_date(): ArrayBuffer[String] ={
val startTime = "2017-12-01" def main(args: Array[String]): Unit = {
val endTime = "2017-12-10" parser.parse(args, defaultParams).map { param =>
val dateFormat = new SimpleDateFormat("yyyy-MM-dd") GmeiConfig.setup(param.env)
val dateFiled = Calendar.DAY_OF_MONTH val spark_env = GmeiConfig.getSparkSession()
var beginDate = dateFormat.parse(startTime) val sc = spark_env._2
val endDate = dateFormat.parse(endTime)
val calendar = Calendar.getInstance() val ti = new TiContext(sc)
calendar.setTime(beginDate) ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
val dateArray: ArrayBuffer[String] = ArrayBuffer() ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
while (beginDate.compareTo(endDate) <= 0) { ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
dateArray += dateFormat.format(beginDate) ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
calendar.add(dateFiled, 1) ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
beginDate = calendar.getTime
val strategies = Seq("3$","4$","5$","6$","7$","8$","a$","b$","c$","d$","e$","A$","B$","C$","D$")
for (strategy <- strategies){
println(strategy)
val get_data_dura = sc.sql(
s"""
|select partition_date, sum(params['duration']) as total_dur,count(distinct(cl_id)) as num
|from online.tl_hdfs_maidian_view
|where where action="on_app_session_over"
|and cl_id regexp '${strategy}'
|and params['duration']>=0 and params['duration']<86400
|and partition_date >='20180801'
|group by partition_date order by partition_date desc
""".stripMargin
)
get_data_dura.show()
}
} }
//println(dateArray)
dateArray
}
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment