Commit 257f49a2 authored by 张彦钊's avatar 张彦钊

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

add app list
parents 93f7a4b9 de59ad73
...@@ -558,7 +558,7 @@ object GetDevicePortrait { ...@@ -558,7 +558,7 @@ object GetDevicePortrait {
| on COALESCE(a.params['diary_id'], a.params['business_id'], 0) = b.diary_id | on COALESCE(a.params['diary_id'], a.params['business_id'], 0) = b.diary_id
| where | where
| b.level1_ids is not null and | b.level1_ids is not null and
| a.partition_date = '20181224' | a.partition_date = '${stat_date}'
| and (a.action = 'on_click_diary_card' or (a.action="full_stack_click_video_card_full_screen_play" and a.params["card_type"]="diary"))) c | and (a.action = 'on_click_diary_card' or (a.action="full_stack_click_video_card_full_screen_play" and a.params["card_type"]="diary"))) c
|group by c.device_id,c.level1_id,c.stat_date |group by c.device_id,c.level1_id,c.stat_date
""".stripMargin """.stripMargin
......
...@@ -3,11 +3,12 @@ package com.gmei ...@@ -3,11 +3,12 @@ package com.gmei
import java.util.Properties import java.util.Properties
import java.io.Serializable import java.io.Serializable
import java.sql.{Connection, DriverManager}
import java.text.SimpleDateFormat import java.text.SimpleDateFormat
import java.util.{Calendar} import java.util.Calendar
import com.typesafe.config._ import com.typesafe.config._
import org.apache.spark.{SparkConf,SparkContext} import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
...@@ -77,6 +78,28 @@ object GmeiConfig extends Serializable { ...@@ -77,6 +78,28 @@ object GmeiConfig extends Serializable {
} }
def updateDeviceFeat(iterator: Iterator[(String,String,String,String)]): Unit ={
var conn: Connection= null
var ps:java.sql.PreparedStatement=null
val sql=s"replace into device_feat(device_id,stat_date,level1_id,level1_count) values(?,?,?,?)"
conn=DriverManager.getConnection("jdbc:mysql://10.66.157.22:4000/jerry_prod","root","3SYz54LS9#^9sBvC")
ps = conn.prepareStatement(sql)
try{
iterator.foreach(x => {
ps.setString(1,x._1)
ps.setString(2,x._2)
ps.setString(3,x._3)
ps.setString(4,x._4)
ps.executeUpdate()
})
println("update device feat done")
}catch {
case _ => println("update failed")
}
}
def getMinusNDate(n: Int):String={ def getMinusNDate(n: Int):String={
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd") var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
var cal:Calendar=Calendar.getInstance() var cal:Calendar=Calendar.getInstance()
......
...@@ -526,3 +526,154 @@ object meigou_xiaofei_renshu { ...@@ -526,3 +526,154 @@ object meigou_xiaofei_renshu {
} }
object smart_rank_count {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "merge_queue_table")
import sc.implicits._
val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date)
val partition_date = stat_date.replace("-","")
val agency_id = sc.sql(
s"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
""".stripMargin
)
agency_id.createOrReplaceTempView("agency_id")
val blacklist_id = sc.sql(
s"""
|SELECT device_id
|from blacklist
""".stripMargin
)
blacklist_id.createOrReplaceTempView("blacklist_id")
val final_id = sc.sql(
s"""
|select device_id
|from agency_id
|UNION ALL
|select device_id
|from blacklist_id
""".stripMargin
)
final_id.createOrReplaceTempView("final_id")
val meigou_pv = sc.sql(
s"""
|select *
|from online.tl_hdfs_maidian_view
|where action = "page_view"
|and params['page_name']="welfare_detail"
|and partition_date ='20181201'
|LIMIT 3
""".stripMargin
)
meigou_pv.show(80)
// val zixun_pv = sc.sql(
// s"""
// |select cl_id,city_id,params['service_id'] as meigou_id
// |from online.tl_hdfs_maidian_view
// |where partition_date > '20171118'
// |and action = 'welfare_detail_click_message'
// """.stripMargin
// )
// zixun_pv.show(80)
// val meigou_price = sc.sql(
// s"""
// |select md.user_id,sum(md.gengmei_price) as pay_all
// |from online.ml_meigou_order_detail md
// |left join
// |(
// | SELECT
// | order_id
// | FROM mining.ml_order_spam_recognize
// | WHERE partition_date='20181223' AND
// | self_support=0 AND dayBitsGetW1(predict_result,'20181223')=0
// |)spam
// |on md.order_id = spam.order_id
// |where md.status= 2
// |and spam.order_id is null
// |and md.partition_date = '20181223'
// |and md.pay_time is not null
// |and md.validate_time>'2017-01-01 00:00:00.0'
// |group by md.user_id
// |order by sum(md.gengmei_price)
// """.stripMargin
// )
// GmeiConfig.writeToJDBCTable(meigou_price, "meigou_price", SaveMode.Overwrite)
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment