Commit d75dbdb2 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

把esmm训练集改成train_data
parents 64803aa2 15fe67a5
...@@ -312,7 +312,7 @@ object Repeated_content_recommendation_moreday { ...@@ -312,7 +312,7 @@ object Repeated_content_recommendation_moreday {
val now= new Date() val now= new Date()
val dateFormat = new SimpleDateFormat("yyyy-MM-dd") val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
val date = dateFormat.format(now.getTime - 86400000L * 15) val date = dateFormat.format(now.getTime - 86400000L * 8)
val yesterday=dateFormat.format(now.getTime- 86400000L) val yesterday=dateFormat.format(now.getTime- 86400000L)
...@@ -342,6 +342,7 @@ object Repeated_content_recommendation_moreday { ...@@ -342,6 +342,7 @@ object Repeated_content_recommendation_moreday {
val repeated_rate= fenmu / fenzi.toDouble val repeated_rate= fenmu / fenzi.toDouble
val result=List((yesterday,repeated_rate)) val result=List((yesterday,repeated_rate))
println(result)
val df_result = sc.createDataFrame(result) val df_result = sc.createDataFrame(result)
GmeiConfig.writeToJDBCTable(df_result, table = "Repeated_content_recommendation_moreday", SaveMode.Append) GmeiConfig.writeToJDBCTable(df_result, table = "Repeated_content_recommendation_moreday", SaveMode.Append)
...@@ -351,10 +352,7 @@ object Repeated_content_recommendation_moreday { ...@@ -351,10 +352,7 @@ object Repeated_content_recommendation_moreday {
// GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append) // GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
} }
} }
} }
...@@ -640,3 +638,231 @@ object GetHiveSearchData { ...@@ -640,3 +638,231 @@ object GetHiveSearchData {
} }
object find_reason {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure_precise")
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date=param.date
val partition_date = stat_date.replace("-","")
//机构id
val blacklist = sc.sql(
s"""
|select device_id from blacklist
""".stripMargin
)
blacklist.createOrReplaceTempView("blacklist")
val agency_id = sc.sql(
s"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
""".stripMargin
)
agency_id.show()
agency_id.createOrReplaceTempView("agency_id")
//每日新用户
val device_id_newUser = sc.sql(
s"""
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id = blacklist.device_id
|where os.active_type != '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
""".stripMargin
)
device_id_newUser.show()
device_id_newUser.createOrReplaceTempView("device_id_new")
//每日老用户
val device_id_oldUser = sc.sql(
s"""
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id=blacklist.device_id
|where os.active_type = '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
""".stripMargin
)
device_id_oldUser.show()
device_id_oldUser.createOrReplaceTempView("device_id_old")
val all_clk = sc.sql(
s"""
|select '${stat_date}' as stat_date,ov.cl_id as device_id
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action = 'on_click_diary_card'
|and ov.cl_id != "NULL"
|and ov.partition_date='${partition_date}'
|and agency_id.device_id is null
""".stripMargin
)
all_clk.show()
all_clk.createOrReplaceTempView("all_clk_diary_card")
//1.当天老用户中的点击用户数
val old_clk_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(oc.device_id) as old_clk_count
|from all_clk_diary_card oc inner join device_id_old
|on oc.device_id = device_id_old.device_id
|group by stat_date
""".stripMargin
)
old_clk_count.show()
//1.1有点击的老用户
val old_clk_device = sc.sql(
s"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_old
|on oc.device_id = device_id_old.device_id
""".stripMargin
)
old_clk_device.createOrReplaceTempView("old_clk_device")
//2.当天新用户中的点击用户数
val new_clk_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(oc.device_id) as new_clk_count
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
|group by stat_date
""".stripMargin
)
//2.1 有点击的新用户
val new_clk_device = sc.sql(
s"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
""".stripMargin
)
new_clk_device.createOrReplaceTempView("new_clk_device")
//3.当天老用户数
val old_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
|from device_id_old dio inner join agency_id
|on dio.device_id = agency_id.device_id
""".stripMargin
)
//4.当天新用户数
val new_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
|from device_id_new din inner join agency_id
|on din.device_id = agency_id.device_id
""".stripMargin
)
//5.有点击老用户的曝光数
val exp_clkold_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
|from data_feed_exposure_precise dp inner join old_clk_device
|on dp.device_id = old_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
""".stripMargin
)
//6.有点击新用户的曝光数
val exp_clknew_count = sc.sql(
s"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
|from data_feed_exposure_precise dp inner join new_clk_device
|on dp.device_id = new_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
""".stripMargin
)
val result = old_clk_count.join(new_clk_count,"stat_date")
.join(old_count,"stat_date")
.join(new_count,"stat_date")
.join(exp_clkold_count,"stat_date")
.join(exp_clknew_count,"stat_date")
GmeiConfig.writeToJDBCTable(result, "device_clk_imp_reason", SaveMode.Append)
}
}
}
...@@ -76,22 +76,24 @@ object testt { ...@@ -76,22 +76,24 @@ object testt {
|AND pv_ratio >= 0.95 |AND pv_ratio >= 0.95
""".stripMargin """.stripMargin
) )
agency_id.show() // agency_id.show()
agency_id.createOrReplaceTempView("agency_id") agency_id.createOrReplaceTempView("agency_id")
//每日新用户 //每日新用户
val device_id_newUser = sc.sql( val device_id_newUser = sc.sql(
s""" s"""
|select distinct(device_id) as device_id |select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status |from online.ml_device_day_active_status os left join blacklist
|where active_type != '4' |on os.device_id=blacklist.device_id
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' |where os.active_type != '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1' | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4' | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100' | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown') | ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}' |and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
""".stripMargin """.stripMargin
) )
device_id_newUser.show() device_id_newUser.show()
...@@ -103,19 +105,19 @@ object testt { ...@@ -103,19 +105,19 @@ object testt {
|select distinct(os.device_id) as device_id |select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist |from online.ml_device_day_active_status os left join blacklist
|on os.device_id=blacklist.device_id |on os.device_id=blacklist.device_id
|where active_type = '4' |where os.active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' |and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' | ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1' | ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4' | ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100' | ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' | ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown') | ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}' |and os.partition_date ='${partition_date}'
|and blacklist.device_id is null |and blacklist.device_id is null
""".stripMargin """.stripMargin
) )
device_id_oldUser.show() // device_id_oldUser.show()
device_id_oldUser.createOrReplaceTempView("device_id_old") device_id_oldUser.createOrReplaceTempView("device_id_old")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment