Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
98da90ee
Commit
98da90ee
authored
Nov 22, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
增加获取城市列表
parents
0027e2d0
9715983a
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
663 additions
and
11 deletions
+663
-11
GmeiConfig.scala
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
+1
-1
Recommendation_strategy_all.scala
...src/main/scala/com/gmei/Recommendation_strategy_all.scala
+2
-5
WeafareStat.scala
eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
+54
-1
app_list.scala
eda/feededa/src/main/scala/com/gmei/app_list.scala
+435
-0
strategy_other.scala
eda/feededa/src/main/scala/com/gmei/strategy_other.scala
+171
-0
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+0
-4
No files found.
eda/feededa/src/main/scala/com/gmei/GmeiConfig.scala
View file @
98da90ee
...
@@ -34,7 +34,7 @@ object GmeiConfig extends Serializable {
...
@@ -34,7 +34,7 @@ object GmeiConfig extends Serializable {
val
sparkConf
=
new
SparkConf
val
sparkConf
=
new
SparkConf
sparkConf
.
set
(
"spark.sql.crossJoin.enabled"
,
"true"
)
sparkConf
.
set
(
"spark.sql.crossJoin.enabled"
,
"true"
)
sparkConf
.
set
(
"spark.debug.maxToStringFields"
,
"100"
)
sparkConf
.
set
(
"spark.debug.maxToStringFields"
,
"100"
)
sparkConf
.
set
(
"spark.sql.broadcastTimeout"
,
"
1
000"
)
sparkConf
.
set
(
"spark.sql.broadcastTimeout"
,
"
6
000"
)
if
(!
sparkConf
.
contains
(
"spark.master"
))
{
if
(!
sparkConf
.
contains
(
"spark.master"
))
{
sparkConf
.
setMaster
(
"local[3]"
)
sparkConf
.
setMaster
(
"local[3]"
)
...
...
eda/feededa/src/main/scala/com/gmei/Recommendation_strategy_all.scala
View file @
98da90ee
...
@@ -13,8 +13,7 @@ object Recommendation_strategy_all {
...
@@ -13,8 +13,7 @@ object Recommendation_strategy_all {
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
case
class
Params
(
env
:
String
=
"dev"
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
defaultParams
=
Params
()
...
@@ -24,9 +23,6 @@ object Recommendation_strategy_all {
...
@@ -24,9 +23,6 @@ object Recommendation_strategy_all {
opt
[
String
](
"env"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
note
(
"""
"""
|For example, the following command runs this app on a tidb dataset:
|For example, the following command runs this app on a tidb dataset:
...
@@ -54,6 +50,7 @@ object Recommendation_strategy_all {
...
@@ -54,6 +50,7 @@ object Recommendation_strategy_all {
import
sc.implicits._
import
sc.implicits._
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
println
(
stat_date
)
//println(param.date)
//println(param.date)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
val
decive_id_oldUser
=
sc
.
sql
(
val
decive_id_oldUser
=
sc
.
sql
(
...
...
eda/feededa/src/main/scala/com/gmei/WeafareStat.scala
View file @
98da90ee
...
@@ -264,9 +264,61 @@ object NdDataInput {
...
@@ -264,9 +264,61 @@ object NdDataInput {
tidb_input
.
show
()
tidb_input
.
show
()
println
(
tidb_input
.
count
())
println
(
tidb_input
.
count
())
}
}
}
object
ServiceStat
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
note
(
"winter is coming"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"nd_data_meigou_cid"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"eagle"
,
tableName
=
"feed_diary_boost"
)
val
result00
=
sc
.
sql
(
s
"""
|select a.cl_id as device_id,
|COALESCE(a.params['diary_id'], a.params['business_id'], 0) as diary_id,
|c.level1_id as level1_id
|from online.tl_hdfs_maidian_view a
|left join online.tl_hdfs_diary_tags_view b on COALESCE(a.params['diary_id'], a.params['business_id'], 0)=b.diary_id
|left join online.bl_tag_hierarchy_detail c on b.tag_id=c.id
|where a.partition_date > "20181112"
|and a.action="on_click_diary_card"
|and a.params["page_name"]="home"
|and a.cl_id != "NULL"
|and b.partition_date="20181119"
|and c.partition_date="20181119"
"""
.
stripMargin
)
result00
.
collect
.
foreach
(
println
)
}
}
}
}
}
}
\ No newline at end of file
eda/feededa/src/main/scala/com/gmei/app_list.scala
0 → 100644
View file @
98da90ee
package
com.gmei
import
java.io.Serializable
import
org.apache.spark.sql.functions.udf
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
import
java.io._
import
java.text.SimpleDateFormat
import
java.util.Calendar
import
org.apache.spark
object
app_list
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_test"
,
tableName
=
"bl_device_list"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
import
sc.implicits._
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
println
(
param
.
date
)
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
println
(
partition_date
)
//自定义udf函数,增加dataframe 列
val
code
=
(
arg
:
String
)
=>
{
if
(
arg
.
getClass
.
getName
==
"java.lang.String"
)
partition_date
.
toInt
else
0.
toInt
}
val
addCol
=
udf
(
code
)
//以上为udf函数
//机构ID
val
agency_id
=
sc
.
sql
(
s
"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '20181120'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '20181120'
|AND pv_ratio >= 0.95
"""
.
stripMargin
)
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
//获取与新氧用户重合的用户device_id
val
app_list
=
sc
.
sql
(
s
"""
|select distinct(cl_id) as device_id, user_id as user_id, params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action="user_installed_all_app_info"
|and ov.partition_date = '${partition_date}'
|and agency_id.device_id is null
|and ov.cl_id not in (select distinct(device_id) from blacklist)
"""
.
stripMargin
)
//app_list.show()
import
sc.implicits._
val
rdd_df
=
app_list
.
rdd
.
map
(
x
=>(
x
(
0
).
toString
,
x
(
1
).
toString
,
x
(
2
).
toString
,
x
(
3
).
toString
))
.
filter
(
x
=>
x
.
_3
.
contains
(
"新氧美容"
)).
map
(
x
=>
(
x
.
_1
,
x
.
_2
,
x
.
_3
,
x
.
_4
)).
collect
().
toList
.
toDF
(
"device_id"
,
"user_id"
,
"app_list"
,
"channel"
)
rdd_df
.
show
()
//rdd_df.withColumn("stat_date",addCol(rdd_df("device_id")))
rdd_df
.
createOrReplaceTempView
(
"device_id"
)
val
temp
=
sc
.
sql
(
s
"""
|select *
|from device_id
"""
.
stripMargin
)
val
tempp
=
temp
.
withColumn
(
"stat_date"
,
addCol
(
temp
(
"device_id"
)))
tempp
.
show
()
GmeiConfig
.
writeToJDBCTable
(
tempp
,
"device_id_coincidence"
,
SaveMode
.
Append
)
//所有获得应用列表的用户device_id
val
app_list_all
=
sc
.
sql
(
s
"""
|select distinct(cl_id) as device_id, user_id as user_id,params['installed_app_info'] as app_list,channel
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where action="user_installed_all_app_info"
|and agency_id.device_id is null
|and ov.partition_date = '${partition_date}'
|and ov.cl_id not in (select distinct(device_id) from blacklist)
"""
.
stripMargin
)
val
tempp_list
=
app_list_all
.
withColumn
(
"stat_date"
,
addCol
(
app_list_all
(
"device_id"
)))
GmeiConfig
.
writeToJDBCTable
(
tempp_list
,
"device_id_applist"
,
SaveMode
.
Append
)
}
}
}
//以下object进行数据统计分析
object
coincidence_xinyang
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"device_id_applist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"device_id_coincidence"
)
//println(param.date)
//val partition_date = param.date.replace("-","")
//println(partition_date)
//获取昨天的日期
def
getYesterDay
()
:
String
={
var
dateFormat
:
SimpleDateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
var
cal
:
Calendar
=
Calendar
.
getInstance
()
cal
.
add
(
Calendar
.
DATE
,-
1
)
var
yesterday
=
dateFormat
.
format
(
cal
.
getTime
())
yesterday
}
val
yesterday
=
getYesterDay
()
println
(
yesterday
)
//获取一周前的日期
def
getWeekDay
()
:
String
={
var
dateFormat
:
SimpleDateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
var
cal
:
Calendar
=
Calendar
.
getInstance
()
cal
.
add
(
Calendar
.
DATE
,-
7
)
var
yesterday
=
dateFormat
.
format
(
cal
.
getTime
())
yesterday
}
val
pre_weekday
=
getWeekDay
()
println
(
pre_weekday
)
//获取一个月前的日期
def
getMonthDay
()
:
String
={
var
dateFormat
:
SimpleDateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
var
cal
:
Calendar
=
Calendar
.
getInstance
()
cal
.
add
(
Calendar
.
DATE
,-
30
)
var
yesterday
=
dateFormat
.
format
(
cal
.
getTime
())
yesterday
}
val
pre_monthday
=
getMonthDay
()
println
(
pre_monthday
)
//获取每日活跃用户中与新氧重合用户占比
//1.每日活跃中与新氧重合用户数
val
yesterday_data
=
yesterday
.
replace
(
"-"
,
""
)
val
yesterday_coincidence
=
sc
.
sql
(
s
"""
|select '${yesterday_data}' as stat_date,count(distinct(device_id)) as yesterday_coincidence_num
|from device_id_coincidence
|where stat_date = '${yesterday_data}'
"""
.
stripMargin
)
//2.获得应用列表的每日活跃总用户数
val
yesterday_expoure_num
=
sc
.
sql
(
s
"""
|select '${yesterday_data}' as stat_date,count(distinct(device_id)) as yesterday_expoure_num
|from device_id_applist
|where stat_date = '${yesterday_data}'
"""
.
stripMargin
)
//获取每周活跃用户中与新氧重合用户占比
//1.每周活跃中与新氧重合用户数
val
week_day
=
pre_weekday
.
replace
(
"-"
,
""
)
val
week_coincidence
=
sc
.
sql
(
s
"""
|select '${yesterday_data}' as stat_date,count(distinct(device_id)) as week_coincidence_num
|from device_id_coincidence
|where stat_date >= '${week_day}'
"""
.
stripMargin
)
//2.获得应用列表的每周活跃总用户
val
week_expoure_num
=
sc
.
sql
(
s
"""
|select '${yesterday_data}' as stat_date,count(distinct(device_id)) as week_expoure_num
|from device_id_applist
|where stat_date >= '${week_day}'
"""
.
stripMargin
)
/* //获取每月活跃用户中与新氧用户重合用户占比
//1.0 每月活跃中与新氧用户重合用户数
val month_day=pre_monthday.replace("-","")
val month_coincidence = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(device_id)) as month_coincidence_num
|from device_id_coincidence
|where stat_date > '${month_day}'
""".stripMargin
)
//2.0 获得应用列表的每月活跃总用户
val month_expoure_num = sc.sql(
s"""
|select '${data}' as stat_date,count(distinct(cl_id)) as month_expoure_num
|from online.tl_hdfs_maidian_view
|where partition_date >= '${month_day}'
|and action="user_installed_all_app_info"
|and cl_id not in (select device_id from bl_device_list)
|and cl_id not in (select device_id from blacklist)
""".stripMargin
)
*/
val
result
=
yesterday_coincidence
.
join
(
yesterday_expoure_num
,
"stat_date"
)
.
join
(
week_coincidence
,
"stat_date"
)
.
join
(
week_expoure_num
,
"stat_date"
)
GmeiConfig
.
writeToJDBCTable
(
result
,
"coincidence_xinyang"
,
SaveMode
.
Append
)
//获取到的应用列表用户中截止目前获得的与新氧重合的用户数计算美购情况
//* 重合用户的id
val
coincidence_id
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from device_id_coincidence
"""
.
stripMargin
)
coincidence_id
.
createOrReplaceTempView
(
"coincidence_id"
)
//获取应用列表的所有用户id
val
all_id
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from device_id_applist
"""
.
stripMargin
)
all_id
.
createOrReplaceTempView
(
"all_id"
)
//* 所有获得用户列表的用户id
//1.重合用户的美购数
val
meigou_coincidence_num
=
sc
.
sql
(
s
"""
|select count(ov.device_id) as meigou_coincidence_num
|from online.ml_meigou_order_detail ov left join coincidence_id
|on ov.device_id = coincidence_id.device_id
|where partition_date = '20181120'
|and coincidence_id.device_id is not null
|and ov.pay_time is not null
|and ov.pay_time >= '2017-11-18'
"""
.
stripMargin
)
meigou_coincidence_num
.
show
()
//2.重合用户进行美购的用户数
val
meigou_pay_device
=
sc
.
sql
(
s
"""
|select count(distinct(ov.device_id)) as meigou_coincidence_num
|from online.ml_meigou_order_detail ov left join coincidence_id
|on ov.device_id = coincidence_id.device_id
|where partition_date = '20181120'
|and coincidence_id.device_id is not null
|and ov.pay_time is not null
|and ov.pay_time >= '2017-11-18'
"""
.
stripMargin
)
meigou_pay_device
.
show
()
//3.所有获得应用列表的用户的美购数
val
meigou_pay_all
=
sc
.
sql
(
s
"""
|select count(od.device_id) as meigou_pay_device
|from online.ml_meigou_order_detail od inner join all_id
|on od.device_id = all_id.device_id
|where partition_date = '20181120'
|and all_id.device_id is not null
|and od.pay_time is not null
|and od.pay_time >= '2017-11-18'
"""
.
stripMargin
)
meigou_pay_all
.
show
()
//4.所有获得应用列表用户进行美购的用户数
val
meigou_pay_device_all
=
sc
.
sql
(
s
"""
|select count(distinct(od.device_id)) as meigou_pay_device
|from online.ml_meigou_order_detail od inner join all_id
|on od.device_id = all_id.device_id
|where partition_date = '20181120'
|and all_id.device_id is not null
|and od.pay_time is not null
|and od.pay_time >= '2017-11-18'
"""
.
stripMargin
)
meigou_pay_device_all
.
show
()
//截止目前获得的与新氧重合的用户数咨询统计
/* val zixun_num_all = sc.sql(
s"""
|select count(ov.cl_id) as zixun_num_all
|from online.tl_hdfs_maidian_view ov left join coincidence_id
|on ov.cl_id = coincidence_id.device_id
|where partition_date >= '20180501'
|and coincidence_id.device_id is not null
|and action = 'welfare_detail_click_message'
""".stripMargin
)
zixun_num_all.show()
val zixun_device_all = sc.sql(
s"""
|select count(distinct(ov.cl_id)) as zixun_num_all
|from online.tl_hdfs_maidian_view ov left join coincidence_id
|on ov.cl_id = coincidence_id.device_id
|where partition_date >= '20180501'
|and coincidence_id.device_id is not null
|and action = 'welfare_detail_click_message'
""".stripMargin
)
zixun_device_all.show()
*/
val
zixun_num_co
=
sc
.
sql
(
s
"""
|select count(ov.cl_id) as zixun_num_all
|from online.tl_hdfs_maidian_view ov left join all_id
|on ov.cl_id = all_id.device_id
|where partition_date >= '20180501'
|and all_id.device_id is not null
|and action = 'welfare_detail_click_message'
"""
.
stripMargin
)
zixun_num_co
.
show
()
val
zixun_num_co_dis
=
sc
.
sql
(
s
"""
|select count(distinct(ov.cl_id)) as zixun_num_all
|from online.tl_hdfs_maidian_view ov left join all_id
|on ov.cl_id = all_id.device_id
|where partition_date >= '20180501'
|and all_id.device_id is not null
|and action = 'welfare_detail_click_message'
"""
.
stripMargin
)
zixun_num_co_dis
.
show
()
}
}
}
eda/feededa/src/main/scala/com/gmei/strategy_other.scala
View file @
98da90ee
...
@@ -179,3 +179,174 @@ object strategy_other {
...
@@ -179,3 +179,174 @@ object strategy_other {
}
}
}
}
//下边内容开始分析统计推荐系统评价指标
//使用信息熵描述推荐系统对长尾优质物品(日记本)的挖掘能力
//使用基尼系数描述推荐系统对日记本推荐是否具有马太效应
object
evaluation_indicator_
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
import
sc.implicits._
//val stat_date = GmeiConfig.getMinusNDate(1)
//println(param.date)
val
partition_date
=
param
.
date
.
replace
(
"-"
,
""
)
val
devicee_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(device_id) as device_id
|from online.ml_device_day_active_status
|where active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03')
|and partition_date ='${partition_date}'
"""
.
stripMargin
)
devicee_id_oldUser
.
show
()
devicee_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
//device_id尾号1有点击用户日记本点击数
val
clk_active_1
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(jd.cid_id) as clk_active_1
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//device_id尾号1有点击用户日记本曝光数
val
imp_active_1
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(je.cid_id) as imp_active_1
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where device_id regexp '1$$' and stat_date = '${param.date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
//device_id尾号1点击日记本用户数
val
clk_diary_device
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(distinct(jd.device_id)) as clk_diary_device
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id regexp'1$$'
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//所有有点击用户日记本点击数
val
clk_active_all
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(jd.cid_id) as clk_active_all
|from data_feed_click jd inner join device_id_old
|on jd.device_id = device_id_old.device_id
|where (jd.cid_type = 'diary' or jd.cid_type = 'diary_video')
|and jd.device_id not in (select device_id from bl_device_list)
|and jd.device_id not in (select device_id from blacklist)
|and jd.stat_date ='${param.date}'
"""
.
stripMargin
)
//所有有点击用户日记本曝光数
val
imp_active_all
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date, count(je.cid_id) as imp_active_all
|from data_feed_exposure je inner join device_id_old
|on je.device_id = device_id_old.device_id
|where je.cid_type = 'diary'
|and je.device_id in (select distinct(device_id) from data_feed_click where stat_date = '${param.date}')
|and je.device_id not in (select device_id from bl_device_list)
|and je.device_id not in (select device_id from blacklist)
|and je.stat_date ='${param.date}'
"""
.
stripMargin
)
//策略命中用户点击日记本用户数
val
clk_diary_device_cover
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date,count(distinct(device_id)) as clk_diary_device_cover
|from merge_queue_table
|where device_id in (select distinct(device_id) from data_feed_click where stat_date = '${param.date}')
"""
.
stripMargin
)
//策略命中用户总数
val
device_all_cover
=
sc
.
sql
(
s
"""
|select '${param.date}' as stat_date,count(distinct(device_id)) as device_all_cover
|from merge_queue_table
"""
.
stripMargin
)
val
result
=
clk_active_1
.
join
(
imp_active_1
,
"stat_date"
)
.
join
(
clk_active_all
,
"stat_date"
)
.
join
(
imp_active_all
,
"stat_date"
)
.
join
(
clk_diary_device
,
"stat_date"
)
.
join
(
clk_diary_device_cover
,
"stat_date"
)
.
join
(
device_all_cover
,
"stat_date"
)
result
.
show
()
GmeiConfig
.
writeToJDBCTable
(
result
,
"strategy_other"
,
SaveMode
.
Append
)
}
}
}
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
98da90ee
...
@@ -54,7 +54,6 @@ object testt {
...
@@ -54,7 +54,6 @@ object testt {
|from online.tl_hdfs_maidian_view
|from online.tl_hdfs_maidian_view
|where action="page_view"
|where action="page_view"
|and params["page_name"]="diary_detail"
|and params["page_name"]="diary_detail"
|and (params["out"]-params["in"])<7200
|and partition_date >='20180901'
|and partition_date >='20180901'
"""
.
stripMargin
"""
.
stripMargin
)
)
...
@@ -71,9 +70,6 @@ object testt {
...
@@ -71,9 +70,6 @@ object testt {
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment