Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
098f5ac3
Commit
098f5ac3
authored
Nov 21, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改统计参数
parent
416eb3a4
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
236 additions
and
40 deletions
+236
-40
app_list.scala
eda/feededa/src/main/scala/com/gmei/app_list.scala
+56
-40
strategy_other.scala
eda/feededa/src/main/scala/com/gmei/strategy_other.scala
+180
-0
No files found.
eda/feededa/src/main/scala/com/gmei/app_list.scala
View file @
098f5ac3
...
@@ -194,6 +194,18 @@ object coincidence_xinyang {
...
@@ -194,6 +194,18 @@ object coincidence_xinyang {
val
pre_monthday
=
getMonthDay
()
val
pre_monthday
=
getMonthDay
()
println
(
pre_monthday
)
println
(
pre_monthday
)
//获取截止目前获得应用列表的用户的device_id
val
all_device_id
=
sc
.
sql
(
s
"""
|select distinct(cl_id) as device_id
|from online.tl_hdfs_maidian_view
|where action="user_installed_all_app_info"
|and partition_date >= '20181114'
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
)
all_device_id
.
createOrReplaceTempView
(
"all_device_id"
)
//获取每日活跃用户中与新氧重合用户占比
//获取每日活跃用户中与新氧重合用户占比
...
@@ -206,16 +218,19 @@ object coincidence_xinyang {
...
@@ -206,16 +218,19 @@ object coincidence_xinyang {
|where stat_date = '${data}'
|where stat_date = '${data}'
"""
.
stripMargin
"""
.
stripMargin
)
)
//2.
每日活跃总用户数(采用曝光计算)
//2.
获得应用列表的每日活跃总用户数
val
yesterday_expoure_num
=
sc
.
sql
(
val
yesterday_expoure_num
=
sc
.
sql
(
s
"""
s
"""
|select '${data}' as stat_date,count(distinct(device_id)) as yesterday_expoure_num
|select '${data}' as stat_date,count(distinct(cl_id)) as yesterday_expoure_num
|from data_feed_exposure
|from online.tl_hdfs_maidian_view
|where stat_date = '${yesterday}'
|where partition_date = '${data}'
|and device_type !='App Store'
|and action="user_installed_all_app_info"
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
//获取每周活跃用户中与新氧重合用户占比
//获取每周活跃用户中与新氧重合用户占比
//1.每周活跃中与新氧重合用户数
//1.每周活跃中与新氧重合用户数
val
week_day
=
pre_weekday
.
replace
(
"-"
,
""
)
val
week_day
=
pre_weekday
.
replace
(
"-"
,
""
)
...
@@ -223,16 +238,18 @@ object coincidence_xinyang {
...
@@ -223,16 +238,18 @@ object coincidence_xinyang {
s
"""
s
"""
|select '${data}' as stat_date,count(distinct(device_id)) as week_coincidence_num
|select '${data}' as stat_date,count(distinct(device_id)) as week_coincidence_num
|from device_id_coincidence
|from device_id_coincidence
|where stat_date > '${week_day}'
|where stat_date >
=
'${week_day}'
"""
.
stripMargin
"""
.
stripMargin
)
)
//2.
每周活跃总用户(采用曝光计算)
//2.
获得应用列表的每周活跃总用户
val
week_expoure_num
=
sc
.
sql
(
val
week_expoure_num
=
sc
.
sql
(
s
"""
s
"""
|select '${data}' as stat_date,count(distinct(device_id)) as week_expoure_num
|select '${data}' as stat_date,count(distinct(cl_id)) as week_expoure_num
|from data_feed_exposure
|from online.tl_hdfs_maidian_view
|where stat_date > '${pre_weekday}'
|where partition_date >= '${week_day}'
|and device_type !='App Store'
|and action="user_installed_all_app_info"
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
...
@@ -246,12 +263,15 @@ object coincidence_xinyang {
...
@@ -246,12 +263,15 @@ object coincidence_xinyang {
|where stat_date > '${month_day}'
|where stat_date > '${month_day}'
"""
.
stripMargin
"""
.
stripMargin
)
)
//2.0
每月活跃总用户(采用曝光计算)
//2.0
获得应用列表的每月活跃总用户
val
month_expoure_num
=
sc
.
sql
(
val
month_expoure_num
=
sc
.
sql
(
s
"""
s
"""
|select '${data}' as stat_date,count(distinct(device_id)) as month_expoure_num
|select '${data}' as stat_date,count(distinct(cl_id)) as month_expoure_num
|from data_feed_exposure
|from online.tl_hdfs_maidian_view
|where stat_date > '${pre_monthday}'
|where partition_date >= '${month_day}'
|and action="user_installed_all_app_info"
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
...
@@ -263,13 +283,13 @@ object coincidence_xinyang {
...
@@ -263,13 +283,13 @@ object coincidence_xinyang {
GmeiConfig
.
writeToJDBCTable
(
result
,
"coincidence_xinyang"
,
SaveMode
.
Append
)
GmeiConfig
.
writeToJDBCTable
(
result
,
"coincidence_xinyang"
,
SaveMode
.
Append
)
//截止目前获得的与新氧重合的用户数计算美购情况
//
获取到的应用列表用户中
截止目前获得的与新氧重合的用户数计算美购情况
//1.重合用户的美购数
//1.重合用户的美购数
val
meigou_coincidence_num
=
sc
.
sql
(
val
meigou_coincidence_num
=
sc
.
sql
(
s
"""
s
"""
|select count(service_id) as meigou_coincidence_num
|select count(service_id) as meigou_coincidence_num
|from online.ml_meigou_order_detail
|from online.ml_meigou_order_detail
|where partition_date = '201811
18
'
|where partition_date = '201811
20
'
|and pay_time is not null
|and pay_time is not null
|and pay_time >= '2017-11-18'
|and pay_time >= '2017-11-18'
|and device_id in (select distinct(device_id) from device_id_coincidence)
|and device_id in (select distinct(device_id) from device_id_coincidence)
...
@@ -283,37 +303,35 @@ object coincidence_xinyang {
...
@@ -283,37 +303,35 @@ object coincidence_xinyang {
s
"""
s
"""
|select count(DISTINCT(device_id)) as meigou_pay_device
|select count(DISTINCT(device_id)) as meigou_pay_device
|from online.ml_meigou_order_detail
|from online.ml_meigou_order_detail
|where partition_date = '201811
18
'
|where partition_date = '201811
20
'
|and pay_time is not null
|and pay_time is not null
|and pay_time >= '2017-11-18'
|and pay_time >= '2017-11-18'
|and device_id in (select distinct(device_id) from device_id_coincidence)
|and device_id in (select distinct(device_id) from device_id_coincidence)
"""
.
stripMargin
"""
.
stripMargin
)
)
meigou_pay_device
.
show
()
meigou_pay_device
.
show
()
//3.
所有
用户的美购数
//3.
获得应用列表的
用户的美购数
val
meigou_pay_all
=
sc
.
sql
(
val
meigou_pay_all
=
sc
.
sql
(
s
"""
s
"""
|select count(device_id) as meigou_pay_device
|select count(device_id) as meigou_pay_device
|from online.ml_meigou_order_detail
|from online.ml_meigou_order_detail od inner join all_device_id
|where partition_date = '20181118'
|on od.device_id=all_device_id.device_id
|where partition_date = '20181120'
|and pay_time is not null
|and pay_time is not null
|and pay_time >= '2017-11-18'
|and pay_time >= '2017-11-18'
|and device_id not in (select device_id from bl_device_list)
|and device_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
meigou_pay_all
.
show
()
meigou_pay_all
.
show
()
//4.
所有
进行美购的用户数
//4.
获得应用列表用户
进行美购的用户数
val
meigou_pay_device_all
=
sc
.
sql
(
val
meigou_pay_device_all
=
sc
.
sql
(
s
"""
s
"""
|select count(DISTINCT(device_id)) as meigou_pay_device
|select count(distinct(device_id)) as meigou_pay_device
|from online.ml_meigou_order_detail
|from online.ml_meigou_order_detail od inner join all_device_id
|where partition_date = '20181118'
|on od.device_id=all_device_id.device_id
|where partition_date = '20181120'
|and pay_time is not null
|and pay_time is not null
|and pay_time >= '2017-11-18'
|and pay_time >= '2017-11-18'
|and device_id not in (select device_id from bl_device_list)
|and device_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
meigou_pay_device_all
.
show
()
meigou_pay_device_all
.
show
()
...
@@ -322,23 +340,21 @@ object coincidence_xinyang {
...
@@ -322,23 +340,21 @@ object coincidence_xinyang {
val
zixun_num_all
=
sc
.
sql
(
val
zixun_num_all
=
sc
.
sql
(
s
"""
s
"""
|select count(cl_id) as zixun_num_all
|select count(cl_id) as zixun_num_all
|from online.tl_hdfs_maidian_view
|from online.tl_hdfs_maidian_view ov inner join all_device_id
|where partition_date > '20180501'
|on ov.cl_id = all_device_id.device_id
|where partition_date >= '20180501'
|and action = 'welfare_detail_click_message'
|and action = 'welfare_detail_click_message'
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
zixun_num_all
.
show
()
zixun_num_all
.
show
()
val
zixun_device_all
=
sc
.
sql
(
val
zixun_device_all
=
sc
.
sql
(
s
"""
s
"""
|select count(distinct(cl_id)) zixun_device_all
|select count(distinct(cl_id)) as zixun_num_all
|from online.tl_hdfs_maidian_view
|from online.tl_hdfs_maidian_view ov inner join all_device_id
|where partition_date > '20180501'
|on ov.cl_id = all_device_id.device_id
|where partition_date >= '20180501'
|and action = 'welfare_detail_click_message'
|and action = 'welfare_detail_click_message'
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
"""
.
stripMargin
"""
.
stripMargin
)
)
zixun_device_all
.
show
()
zixun_device_all
.
show
()
...
@@ -347,7 +363,7 @@ object coincidence_xinyang {
...
@@ -347,7 +363,7 @@ object coincidence_xinyang {
s
"""
s
"""
|select count(cl_id) as zixun_num_co
|select count(cl_id) as zixun_num_co
|from online.tl_hdfs_maidian_view
|from online.tl_hdfs_maidian_view
|where partition_date > '20180501'
|where partition_date >
=
'20180501'
|and action = 'welfare_detail_click_message'
|and action = 'welfare_detail_click_message'
|and cl_id in (select distinct(device_id) from device_id_coincidence)
|and cl_id in (select distinct(device_id) from device_id_coincidence)
"""
.
stripMargin
"""
.
stripMargin
...
@@ -358,7 +374,7 @@ object coincidence_xinyang {
...
@@ -358,7 +374,7 @@ object coincidence_xinyang {
s
"""
s
"""
|select count(distinct(cl_id)) as zixun_num_co_dis
|select count(distinct(cl_id)) as zixun_num_co_dis
|from online.tl_hdfs_maidian_view
|from online.tl_hdfs_maidian_view
|where partition_date > '20180501'
|where partition_date >
=
'20180501'
|and action = 'welfare_detail_click_message'
|and action = 'welfare_detail_click_message'
|and cl_id in (select distinct(device_id) from device_id_coincidence)
|and cl_id in (select distinct(device_id) from device_id_coincidence)
"""
.
stripMargin
"""
.
stripMargin
...
...
eda/feededa/src/main/scala/com/gmei/strategy_other.scala
View file @
098f5ac3
...
@@ -179,3 +179,183 @@ object strategy_other {
...
@@ -179,3 +179,183 @@ object strategy_other {
}
}
}
}
//下边内容开始分析统计推荐系统评价指标
//使用信息熵描述推荐系统对长尾优质物品(日记本)的挖掘能力
//使用基尼系数描述推荐系统对日记本推荐是否具有马太效应
object
evaluation_indicator_
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
import
sc.implicits._
//val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date)
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
val
devicee_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
devicee_id_oldUser
.
show
()
devicee_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
//device_id尾号1有点击用户日记本点击数
val
clk_active_1
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(jd.cid_id) as clk_active_1
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//device_id尾号1有点击用户日记本曝光数
val
imp_active_1
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(je.cid_id) as imp_active_1
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where device_id regexp '1$$' and stat_date = '${param.date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
//device_id尾号1点击日记本用户数
val
clk_diary_device
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(distinct(jd.device_id)) as clk_diary_device
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//所有有点击用户日记本点击数
val
clk_active_all
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(jd.cid_id) as clk_active_all
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//所有有点击用户日记本曝光数
val
imp_active_all
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(je.cid_id) as imp_active_all
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where stat_date = '${param.date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
//策略命中用户点击日记本用户数
val
clk_diary_device_cover
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date,count(distinct(device_id)) as clk_diary_device_cover
|from merge_queue_table
|where device_id in (select distinct(device_id) from data_feed_click where stat_date = '${param.date}')
"""
.
stripMargin
)
//策略命中用户总数
val
device_all_cover
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date,count(distinct(device_id)) as device_all_cover
|from merge_queue_table
"""
.
stripMargin
)
val
result
=
clk_active_1
.
join
(
imp_active_1
,
"stat_date"
)
.
join
(
clk_active_all
,
"stat_date"
)
.
join
(
imp_active_all
,
"stat_date"
)
.
join
(
clk_diary_device
,
"stat_date"
)
.
join
(
clk_diary_device_cover
,
"stat_date"
)
.
join
(
device_all_cover
,
"stat_date"
)
result
.
show
()
GmeiConfig
.
writeToJDBCTable
(
result
,
"strategy_other"
,
SaveMode
.
Append
)
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment