Commit cb0a1bf4 authored by 王志伟's avatar 王志伟

新统计需求

parent ca97e747
package com.gmei
import java.io.Serializable
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.functions.udf
import com.gmei.WeafareStat.{defaultParams, parser}
import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
import java.io._
import java.text.SimpleDateFormat
import java.util.Calendar
import org.apache.spark
......@@ -100,8 +102,165 @@ object app_list {
GmeiConfig.writeToJDBCTable(tempp, "device_id_contain", SaveMode.Append)
}
}
}
//以下object进行数据统计分析
object coincidence_xinyang {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
case class Params(env: String = "dev",
date: String = "2018-08-01"
) extends AbstractParams[Params] with Serializable
val defaultParams = Params()
val parser = new OptionParser[Params]("Feed_EDA") {
head("WeafareStat")
opt[String]("env")
.text(s"the databases environment you used")
.action((x, c) => c.copy(env = x))
opt[String] ("date")
.text(s"the date you used")
.action((x,c) => c.copy(date = x))
note(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
""".stripMargin +
s"| --env ${defaultParams.env}"
)
}
def main(args: Array[String]): Unit = {
parser.parse(args, defaultParams).map { param =>
GmeiConfig.setup(param.env)
val spark_env = GmeiConfig.getSparkSession()
val sc = spark_env._2
val ti = new TiContext(sc)
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "device_id_contain")
//println(param.date)
//val partition_date = param.date.replace("-","")
//println(partition_date)
//获取昨天的日期
def getYesterDay():String={
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
var cal:Calendar=Calendar.getInstance()
cal.add(Calendar.DATE,-1)
var yesterday=dateFormat.format(cal.getTime())
yesterday
}
val yesterday=getYesterDay()
println(yesterday)
//获取一周前的日期
def getWeekDay():String={
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
var cal:Calendar=Calendar.getInstance()
cal.add(Calendar.DATE,-7)
var yesterday=dateFormat.format(cal.getTime())
yesterday
}
val pre_weekday=getWeekDay()
println(pre_weekday)
//获取一个月前的日期
def getMonthDay():String={
var dateFormat:SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
var cal:Calendar=Calendar.getInstance()
cal.add(Calendar.DATE,-30)
var yesterday=dateFormat.format(cal.getTime())
yesterday
}
val pre_monthday = getMonthDay()
println(pre_monthday)
//获取每日活跃用户中与新氧重合用户占比
//1.每日活跃中与新氧重合用户数
val data = yesterday.replace("-","")
val yesterday_coincidence = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as yesterday_coincidence_num
|from device_id_contain
|where stat_date = '${data}'
""".stripMargin
)
//2.每日活跃总用户数(采用曝光计算)
val yesterday_expoure_num = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as yesterday_expoure_num
|from data_feed_exposure
|where stat_date = '${yesterday}'
|and device_type !='App Store'
""".stripMargin
)
//获取每周活跃用户中与新氧重合用户占比
//1.每周活跃中与新氧重合用户数
val week_day = pre_weekday.replace("-","")
val week_coincidence = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as week_coincidence_num
|from device_id_contain
|where stat_date > '${week_day}'
""".stripMargin
)
//2.每周活跃总用户(采用曝光计算)
val week_expoure_num = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as week_expoure_num
|from data_feed_exposure
|where stat_date > '${pre_weekday}'
|and device_type !='App Store'
""".stripMargin
)
//获取每月活跃用户中与新氧用户重合用户占比
//1.0 每月活跃中与新氧用户重合用户数
val month_day=pre_monthday.replace("-","")
val month_coincidence = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as month_coincidence_num
|from device_id_contain
|where stat_date > '${month_day}'
""".stripMargin
)
//2.0 每月活跃总用户(采用曝光计算)
val month_expoure_num = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as month_expoure_num
|from data_feed_exposure
|where stat_date = '${pre_monthday}'
|and device_type !='App Store'
""".stripMargin
)
val result = yesterday_coincidence.join(yesterday_expoure_num,"stat_date")
.join(week_coincidence,"stat_date")
.join(week_expoure_num,"stat_date")
.join(month_coincidence,"stat_date")
.join(month_expoure_num,"stat_date")
GmeiConfig.writeToJDBCTable(result, "coincidence_xinyang", SaveMode.Append)
}
......
......@@ -70,9 +70,6 @@ object testt {
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment