Commit b86483fe authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

增加曝光量统计指标
parents dc617a5d 2af16a4a
if [[ $# -ne 2 ]];then
echo 'Usage:'$0' <startdate> <enddate>'
exit
fi
startdate=`date -d "$1" +%Y-%m-%d`
enddate=`date -d "$2" +%Y-%m-%d`
if [[ $# -ne 2 ]];then
echo 'Usage:'$0' <startdate> <enddate>'
exit
fi
while [[ $startdate < $enddate]]
do
/opt/spark/bin/spark-submit --master spark://10.31.242.83:7077 --total-executor-cores 20 --executor-memory 3g --executor-cores 2 --driver-memory 8g --conf spark.default.parallelism=200 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --class com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >>ctr1.log
startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
done
startdate=`date -d "$1" +%Y-%m-%d`
enddate=`date -d "$2" +%Y-%m-%d`
while [[ $startdate < $enddate ]]
do
/opt/spark/bin/spark-submit --master spark://10.31.242.83:7077 --total-executor-cores 10 --executor-memory 3g --executor-cores 2 --driver-memory 8g --class com.gmei. strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >> ctr2.log
startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
done
package com.gmei
import java.text.SimpleDateFormat
import java.util.{Calendar, Date}
import java.text.SimpleDateFormat
import java.util.Calendar
import scala.collection.mutable.ArrayBuffer
import java.io.Serializable
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
object testt {
def main(args: Array[String]): Unit ={
val dateArray2 = get_date()
println(dateArray2(0))
for (elem <- dateArray2) {
println(elem)
}
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev") extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
//opt[String] ("date")
// .text(s"the date you used")
// .action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.testt ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def get_date(): ArrayBuffer[String] ={
val startTime = "2017-12-01"
val endTime = "2017-12-10"
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
val dateFiled = Calendar.DAY_OF_MONTH
var beginDate = dateFormat.parse(startTime)
val endDate = dateFormat.parse(endTime)
val calendar = Calendar.getInstance()
calendar.setTime(beginDate)
val dateArray: ArrayBuffer[String] = ArrayBuffer()
while (beginDate.compareTo(endDate) <= 0) {
dateArray += dateFormat.format(beginDate)
calendar.add(dateFiled, 1)
beginDate = calendar.getTime
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
val strategies = Seq("3$","4$","5$","6$","7$","8$","a$","b$","c$","d$","e$","A$","B$","C$","D$")
for (strategy <- strategies){
println(strategy)
val get_data_dura = sc.sql(
s"""
|select partition_date, sum(params['duration']) as total_dur,count(distinct(cl_id)) as num
|from online.tl_hdfs_maidian_view
|where where action="on_app_session_over"
|and cl_id regexp '${strategy}'
|and params['duration']>=0 and params['duration']<86400
|and partition_date >='20180801'
|group by partition_date order by partition_date desc
""".stripMargin
)
get_data_dura.show()
}
}
//println(dateArray)
dateArray
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment