Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
9cde0cf4
Commit
9cde0cf4
authored
Jun 03, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
统计运维想要数据
parent
f7f93b81
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
95 deletions
+20
-95
app_list.scala
eda/feededa/src/main/scala/com/gmei/app_list.scala
+20
-95
No files found.
eda/feededa/src/main/scala/com/gmei/app_list.scala
View file @
9cde0cf4
...
...
@@ -66,111 +66,36 @@ object app_list {
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
println
(
partition_date
)
//自定义udf函数,增加dataframe 列
val
code
=
(
arg
:
String
)
=>
{
if
(
arg
.
getClass
.
getName
==
"java.lang.String"
)
partition_date
.
toInt
else
0.
toInt
}
val
addCol
=
udf
(
code
)
//以上为udf函数
//机构ID
val
agency_id
=
sc
.
sql
(
//每日新用户
val
device_id_newUser
=
sc
.
sql
(
s
"""
|
SELECT DISTINCT(cl
_id) as device_id
|
FROM online.ml_hospital_spam_pv_day
|
WHERE partition_date >= '20180402'
|
AND partition_date <= '20181120
'
|
AND pv_ratio >= 0.95
|
UNION ALL
|
SELECT DISTINCT(cl_id) as device_id
|
FROM online.ml_hospital_spam_pv_month
|
WHERE partition_date >= '20171101
'
|
AND partition_date <= '20181120'
|
AND pv_ratio >= 0.95
|
select distinct(device
_id) as device_id
|
from online.ml_device_day_active_status
|
where active_type = '1' or active_type='2')
|
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3
'
|
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
|
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
|
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
|
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
|
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ
'
|
,'promotion_shike','promotion_julang_jl03','','unknown','promotion_zuimei')
|
and partition_date ='${partition_date}'
"""
.
stripMargin
)
agency_id
.
show
()
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
device_id_newUser
.
createOrReplaceTempView
(
"device_id_new"
)
//获取与新氧用户重合的用户device_id
val
app_list
=
sc
.
sql
(
s
"""
|select distinct(cl_id) as device_id, user_id as user_id, params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action="user_installed_all_app_info"
|and ov.partition_date = '${partition_date}'
|and agency_id.device_id is null
|and ov.cl_id not in (select distinct(device_id) from blacklist)
"""
.
stripMargin
)
//app_list.show()
import
sc.implicits._
val
rdd_df
=
app_list
.
rdd
.
map
(
x
=>(
x
(
0
).
toString
,
x
(
1
).
toString
,
x
(
2
).
toString
,
x
(
3
).
toString
))
.
filter
(
x
=>
x
.
_3
.
contains
(
"新氧美容"
)).
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
x
.
_4
)).
collect
().
toList
.
toDF
(
"device_id"
,
"user_id"
,
"app_list"
,
"channel"
)
rdd_df
.
show
()
//rdd_df.withColumn("stat_date",addCol(rdd_df("device_id")))
rdd_df
.
createOrReplaceTempView
(
"device_id"
)
val
temp
=
sc
.
sql
(
s
"""
|select *
|from device_id
|select '${stat_date}' as stat_date,a.params as app_list
|from online.tl_hdfs_maidian_view a
|inner join device_id_new b
|on a.cl_id=b.device_id
|where a.partition_date ='${partition_date}'
"""
.
stripMargin
)
val
tempp
=
temp
.
withColumn
(
"stat_date"
,
addCol
(
temp
(
"device_id"
)))
tempp
.
show
()
GmeiConfig
.
writeToJDBCTable
(
tempp
,
"device_id_coincidence"
,
SaveMode
.
Append
)
//所有获得应用列表的用户device_id
val
app_list_all
=
sc
.
sql
(
s
"""
|select distinct(cl_id) as device_id, user_id as user_id,params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where action="user_installed_all_app_info"
|and agency_id.device_id is null
|and ov.partition_date = '${partition_date}'
|and ov.cl_id not in (select distinct(device_id) from blacklist)
"""
.
stripMargin
)
val
tempp_list
=
app_list_all
.
withColumn
(
"stat_date"
,
addCol
(
app_list_all
(
"device_id"
)))
GmeiConfig
.
writeToJDBCTable
(
tempp_list
,
"device_id_applist"
,
SaveMode
.
Append
)
//在更美有消费的用户列表
val
device_id_meigou
=
sc
.
sql
(
s
"""
|select DISTINCT(od.device_id) as device_id
|from online.ml_meigou_order_detail od left join agency_id
|on od.device_id = agency_id.device_id
|where od.partition_date = '20181118'
|and od.pay_time is not null
|and od.pay_time >= '2017-11-18'
|and agency_id.device_id is null
|and od.device_id not in (select distinct(device_id) from blacklist)
"""
.
stripMargin
)
device_id_meigou
.
createOrReplaceTempView
(
"device_id_meigou"
)
val
app_list_meigou
=
sc
.
sql
(
s
"""
|select distinct(ov.cl_id) as device_id, user_id as user_id,params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join device_id_meigou
|on ov.cl_id = device_id_meigou.device_id
|where ov.action="user_installed_all_app_info"
|and device_id_meigou.device_id is not null
|and ov.partition_date = '${partition_date}'
"""
.
stripMargin
)
val
applist_meigou
=
app_list_meigou
.
withColumn
(
"stat_date"
,
addCol
(
app_list_meigou
(
"device_id"
)))
GmeiConfig
.
writeToJDBCTable
(
applist_meigou
,
"device_id_applist_meigou"
,
SaveMode
.
Append
)
GmeiConfig
.
writeToJDBCTable
(
app_list
,
"app_list_temp_yunying"
,
SaveMode
.
Append
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment