Commit 9cde0cf4 authored by 王志伟's avatar 王志伟

统计运维想要数据

parent f7f93b81
......@@ -66,111 +66,36 @@ object app_list {
val partition_date = param.date.replace("-","")
println(partition_date)
//自定义udf函数,增加dataframe 列
val code = (arg: String) => {
if (arg.getClass.getName == "java.lang.String") partition_date.toInt else 0.toInt
}
val addCol = udf(code)
//以上为udf函数
//机构ID
val agency_id = sc.sql(
//每日新用户
val device_id_newUser = sc.sql(
s"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '20181120'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '20181120'
|AND pv_ratio >= 0.95
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type = '1' or active_type='2')
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown','promotion_zuimei')
|and partition_date ='${partition_date}'
""".stripMargin
)
agency_id.show()
agency_id.createOrReplaceTempView("agency_id")
device_id_newUser.createOrReplaceTempView("device_id_new")
//获取与新氧用户重合的用户device_id
val app_list = sc.sql(
s"""
|select distinct(cl_id) as device_id, user_id as user_id, params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action="user_installed_all_app_info"
|and ov.partition_date = '${partition_date}'
|and agency_id.device_id is null
|and ov.cl_id not in (select distinct(device_id) from blacklist)
""".stripMargin
)
//app_list.show()
import sc.implicits._
val rdd_df = app_list.rdd.map(x =>(x(0).toString,x(1).toString,x(2).toString,x(3).toString))
.filter(x => x._3.contains("新氧美容")).map(x => (x._1,x._2,x._3,x._4)).collect().toList.toDF("device_id","user_id","app_list","channel")
rdd_df.show()
//rdd_df.withColumn("stat_date",addCol(rdd_df("device_id")))
rdd_df.createOrReplaceTempView("device_id")
val temp = sc.sql(
s"""
|select *
|from device_id
|select '${stat_date}' as stat_date,a.params as app_list
|from online.tl_hdfs_maidian_view a
|inner join device_id_new b
|on a.cl_id=b.device_id
|where a.partition_date ='${partition_date}'
""".stripMargin
)
val tempp=temp.withColumn("stat_date",addCol(temp("device_id")))
tempp.show()
GmeiConfig.writeToJDBCTable(tempp, "device_id_coincidence", SaveMode.Append)
//所有获得应用列表的用户device_id
val app_list_all = sc.sql(
s"""
|select distinct(cl_id) as device_id, user_id as user_id,params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where action="user_installed_all_app_info"
|and agency_id.device_id is null
|and ov.partition_date = '${partition_date}'
|and ov.cl_id not in (select distinct(device_id) from blacklist)
""".stripMargin
)
val tempp_list=app_list_all.withColumn("stat_date",addCol(app_list_all("device_id")))
GmeiConfig.writeToJDBCTable(tempp_list, "device_id_applist", SaveMode.Append)
//在更美有消费的用户列表
val device_id_meigou = sc.sql(
s"""
|select DISTINCT(od.device_id) as device_id
|from online.ml_meigou_order_detail od left join agency_id
|on od.device_id = agency_id.device_id
|where od.partition_date = '20181118'
|and od.pay_time is not null
|and od.pay_time >= '2017-11-18'
|and agency_id.device_id is null
|and od.device_id not in (select distinct(device_id) from blacklist)
""".stripMargin
)
device_id_meigou.createOrReplaceTempView("device_id_meigou")
val app_list_meigou = sc.sql(
s"""
|select distinct(ov.cl_id) as device_id, user_id as user_id,params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join device_id_meigou
|on ov.cl_id = device_id_meigou.device_id
|where ov.action="user_installed_all_app_info"
|and device_id_meigou.device_id is not null
|and ov.partition_date = '${partition_date}'
""".stripMargin
)
val applist_meigou=app_list_meigou.withColumn("stat_date",addCol(app_list_meigou("device_id")))
GmeiConfig.writeToJDBCTable(applist_meigou, "device_id_applist_meigou", SaveMode.Append)
GmeiConfig.writeToJDBCTable(app_list, "app_list_temp_yunying", SaveMode.Append)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment