Commit 00b83d34 authored by 高雅喆's avatar 高雅喆

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

test
parents 35c4c15f 54e81fa9
...@@ -15,7 +15,7 @@ def get_yesterday_date(): ...@@ -15,7 +15,7 @@ def get_yesterday_date():
def get_data(): def get_data():
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = conn2db.cursor() cursor = conn2db.cursor()
sql = "select device_id from ffm_diary_queue_temp where device_id regexp '[5|6]$'" sql = "select distinct device_id from ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
device = tuple(pd.DataFrame(list(result))[0].values.tolist()) device = tuple(pd.DataFrame(list(result))[0].values.tolist())
...@@ -70,8 +70,8 @@ def ctr(): ...@@ -70,8 +70,8 @@ def ctr():
click = cursor.fetchone()[0] click = cursor.fetchone()[0]
print("实验用户点击数:"+str(click)) print("实验用户点击数:"+str(click))
sql_exp = "select count(cid) from data_feed_exposure " \ sql_exp = "select count(cid) from data_feed_exposure " \
"where (cid_type = 'diary' or cid_type = 'diary_video') and stat_date = '{}' and " \ "where (cid_type = 'diary' or cid_type = 'diary_video') " \
"device_id in {}".format(date,device_id) "and stat_date = '{}' and device_id in {}".format(date,device_id)
cursor.execute(sql_exp) cursor.execute(sql_exp)
exp = cursor.fetchone()[0] exp = cursor.fetchone()[0]
print("实验用户曝光数:"+str(exp)) print("实验用户曝光数:"+str(exp))
......
if [[ $# -ne 2 ]];then
echo 'Usage:'$0' <startdate> <enddate>'
exit
fi
startdate=`date -d "$1" +%Y-%m-%d`
enddate=`date -d "$2" +%Y-%m-%d`
while [[ $startdate < $enddate]]
do
/opt/spark/bin/spark-submit --master spark://10.31.242.83:7077 --total-executor-cores 20 --executor-memory 3g --executor-cores 2 --driver-memory 8g --conf spark.default.parallelism=200 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --class com.gmei.jerry.strategy_clk_imp_oldUser /srv/apps/ffm-baseline/eda/feededa/target/scala-2.11/feededa-assembly-0.1.jar --env prod --date $startdate >>ctr1.log
startdate=`date -d "+1 day $startdate" +%Y-%m-%d`
done
package com.gmei package com.gmei
import java.io.Serializable import java.io.Serializable
import java.text.SimpleDateFormat
import java.util.Calendar
import com.gmei.WeafareStat.{defaultParams, parser} import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext} import org.apache.spark.sql.{SaveMode, TiContext}
...@@ -10,15 +8,13 @@ import org.apache.log4j.{Level, Logger} ...@@ -10,15 +8,13 @@ import org.apache.log4j.{Level, Logger}
import scopt.OptionParser import scopt.OptionParser
import com.gmei.lib.AbstractParams import com.gmei.lib.AbstractParams
import scala.collection.mutable.ArrayBuffer
object strategy_clk_imp_oldUser { object strategy_clk_imp_oldUser {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN) Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF) Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev" case class Params(env: String = "dev",
//date: String = "2018-08-01" date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable ) extends AbstractParams[Params] with Serializable
val defaultParams = Params() val defaultParams = Params()
...@@ -28,9 +24,9 @@ object strategy_clk_imp_oldUser { ...@@ -28,9 +24,9 @@ object strategy_clk_imp_oldUser {
opt[String]("env") opt[String]("env")
.text(s"the databases environment you used") .text(s"the databases environment you used")
.action((x, c) => c.copy(env = x)) .action((x, c) => c.copy(env = x))
// opt[String] ("date") opt[String] ("date")
// .text(s"the date you used") .text(s"the date you used")
//.action((x,c) => c.copy(date = x)) .action((x,c) => c.copy(date = x))
note( note(
""" """
|For example, the following command runs this app on a tidb dataset: |For example, the following command runs this app on a tidb dataset:
...@@ -41,28 +37,6 @@ object strategy_clk_imp_oldUser { ...@@ -41,28 +37,6 @@ object strategy_clk_imp_oldUser {
) )
} }
//获得时间array
def get_date(): ArrayBuffer[String] ={
val startTime = "2018-10-07"
val endTime = "2017-10-10"
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
val dateFiled = Calendar.DAY_OF_MONTH
var beginDate = dateFormat.parse(startTime)
val endDate = dateFormat.parse(endTime)
val calendar = Calendar.getInstance()
calendar.setTime(beginDate)
val dateArray: ArrayBuffer[String] = ArrayBuffer()
while (beginDate.compareTo(endDate) <= 0) {
dateArray += dateFormat.format(beginDate)
calendar.add(dateFiled, 1)
beginDate = calendar.getTime
}
// println(dateArray)
dateArray
}
def main(args: Array[String]): Unit = { def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param => parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env) GmeiConfig.setup(param.env)
...@@ -78,13 +52,9 @@ object strategy_clk_imp_oldUser { ...@@ -78,13 +52,9 @@ object strategy_clk_imp_oldUser {
import sc.implicits._ import sc.implicits._
// val stat_date = GmeiConfig.getMinusNDate(1) // val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date) println(param.date)
val date_array = get_date() val partition_date = param.date.replace("-","")
date_array.foreach(println)
for (elem <- date_array) {
val partition_date = elem.replace("-","")
println(partition_date)
val decive_id_oldUser = sc.sql( val decive_id_oldUser = sc.sql(
s""" s"""
|select distinct(device_id) as decive_id |select distinct(device_id) as decive_id
...@@ -109,57 +79,56 @@ object strategy_clk_imp_oldUser { ...@@ -109,57 +79,56 @@ object strategy_clk_imp_oldUser {
val strategies = Seq("[1|2]$","[3|4]$","[5|6]$","[7|8]$") val strategies = Seq("[1|2]$","[3|4]$","[5|6]$","[7|8]$")
for (strategy <- strategies){ for (strategy <- strategies){
println(strategy)
val clk_count_oldUser = sc.sql( val clk_count_oldUser = sc.sql(
s""" s"""
|select '${elem}' as stat_date, count(cid_id) as get_clk_count_old |select '${param.date}' as stat_date, count(cid_id) as get_clk_count_old
|from data_feed_click jd inner join device_id_old |from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.decive_id |on jd.device_id = device_id_old.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video') |where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}' |and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list) |and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist) |and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${elem}' |and jd.stat_date ='${param.date}'
""".stripMargin """.stripMargin
) )
val imp_count_oldUser = sc.sql( val imp_count_oldUser = sc.sql(
s""" s"""
|select '${elem}' as stat_date, count(cid_id) as get_imp_count_old |select '${param.date}' as stat_date, count(cid_id) as get_imp_count_old
|from data_feed_exposure je inner join device_id_old |from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.decive_id |on je.device_id = device_id_old.decive_id
|where je.cid_type = 'diary' |where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}' |and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist) |and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${elem}' |and je.stat_date ='${param.date}'
""".stripMargin """.stripMargin
) )
val clk_count_newUser = sc.sql( val clk_count_newUser = sc.sql(
s""" s"""
|select '${elem}' as stat_date, count(cid_id) as get_clk_count_newUser |select '${param.date}' as stat_date, count(cid_id) as get_clk_count_newUser
|from data_feed_click jd inner join device_id_newUser |from data_feed_click jd inner join device_id_newUser
|on jd.device_id = device_id_newUser.decive_id |on jd.device_id = device_id_newUser.decive_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video') |where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'${strategy}' |and jd.device_id regexp'${strategy}'
|and jd.device_id not in (select device_id from bl_device_list) |and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist) |and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${elem}' |and jd.stat_date ='${param.date}'
""".stripMargin """.stripMargin
) )
val imp_count_newUser = sc.sql( val imp_count_newUser = sc.sql(
s""" s"""
|select '${elem}' as stat_date, count(cid_id) as get_imp_count_newUser |select '${param.date}' as stat_date, count(cid_id) as get_imp_count_newUser
|from data_feed_exposure je inner join device_id_newUser |from data_feed_exposure je inner join device_id_newUser
|on je.device_id = device_id_newUser.decive_id |on je.device_id = device_id_newUser.decive_id
|where je.cid_type = 'diary' |where je.cid_type = 'diary'
|and je.device_id regexp'${strategy}' |and je.device_id regexp'${strategy}'
|and je.device_id not in (select device_id from bl_device_list) |and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist) |and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${elem}' |and je.stat_date ='${param.date}'
""".stripMargin """.stripMargin
) )
imp_count_newUser.show() imp_count_newUser.show()
...@@ -172,7 +141,8 @@ object strategy_clk_imp_oldUser { ...@@ -172,7 +141,8 @@ object strategy_clk_imp_oldUser {
} }
}
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment