Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
d75dbdb2
Commit
d75dbdb2
authored
Feb 25, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline
把esmm训练集改成train_data
parents
64803aa2
15fe67a5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
242 additions
and
14 deletions
+242
-14
temp_count.scala
eda/feededa/src/main/scala/com/gmei/temp_count.scala
+230
-4
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+12
-10
No files found.
eda/feededa/src/main/scala/com/gmei/temp_count.scala
View file @
d75dbdb2
...
@@ -312,7 +312,7 @@ object Repeated_content_recommendation_moreday {
...
@@ -312,7 +312,7 @@ object Repeated_content_recommendation_moreday {
val
now
=
new
Date
()
val
now
=
new
Date
()
val
dateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
val
dateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
val
date
=
dateFormat
.
format
(
now
.
getTime
-
86400000L
*
15
)
val
date
=
dateFormat
.
format
(
now
.
getTime
-
86400000L
*
8
)
val
yesterday
=
dateFormat
.
format
(
now
.
getTime
-
86400000L
)
val
yesterday
=
dateFormat
.
format
(
now
.
getTime
-
86400000L
)
...
@@ -342,6 +342,7 @@ object Repeated_content_recommendation_moreday {
...
@@ -342,6 +342,7 @@ object Repeated_content_recommendation_moreday {
val
repeated_rate
=
fenmu
/
fenzi
.
toDouble
val
repeated_rate
=
fenmu
/
fenzi
.
toDouble
val
result
=
List
((
yesterday
,
repeated_rate
))
val
result
=
List
((
yesterday
,
repeated_rate
))
println
(
result
)
val
df_result
=
sc
.
createDataFrame
(
result
)
val
df_result
=
sc
.
createDataFrame
(
result
)
GmeiConfig
.
writeToJDBCTable
(
df_result
,
table
=
"Repeated_content_recommendation_moreday"
,
SaveMode
.
Append
)
GmeiConfig
.
writeToJDBCTable
(
df_result
,
table
=
"Repeated_content_recommendation_moreday"
,
SaveMode
.
Append
)
...
@@ -351,10 +352,7 @@ object Repeated_content_recommendation_moreday {
...
@@ -351,10 +352,7 @@ object Repeated_content_recommendation_moreday {
// GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
// GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
}
}
}
}
}
}
...
@@ -640,3 +638,231 @@ object GetHiveSearchData {
...
@@ -640,3 +638,231 @@ object GetHiveSearchData {
}
}
object
find_reason
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure_precise"
)
// val stat_date = GmeiConfig.getMinusNDate(1)
val
stat_date
=
param
.
date
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
//机构id
val
blacklist
=
sc
.
sql
(
s
"""
|select device_id from blacklist
"""
.
stripMargin
)
blacklist
.
createOrReplaceTempView
(
"blacklist"
)
val
agency_id
=
sc
.
sql
(
s
"""
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_day
|WHERE partition_date >= '20180402'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '${partition_date}'
|AND pv_ratio >= 0.95
"""
.
stripMargin
)
agency_id
.
show
()
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
//每日新用户
val
device_id_newUser
=
sc
.
sql
(
s
"""
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id = blacklist.device_id
|where os.active_type != '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
"""
.
stripMargin
)
device_id_newUser
.
show
()
device_id_newUser
.
createOrReplaceTempView
(
"device_id_new"
)
//每日老用户
val
device_id_oldUser
=
sc
.
sql
(
s
"""
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id=blacklist.device_id
|where os.active_type = '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
"""
.
stripMargin
)
device_id_oldUser
.
show
()
device_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
val
all_clk
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,ov.cl_id as device_id
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action = 'on_click_diary_card'
|and ov.cl_id != "NULL"
|and ov.partition_date='${partition_date}'
|and agency_id.device_id is null
"""
.
stripMargin
)
all_clk
.
show
()
all_clk
.
createOrReplaceTempView
(
"all_clk_diary_card"
)
//1.当天老用户中的点击用户数
val
old_clk_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(oc.device_id) as old_clk_count
|from all_clk_diary_card oc inner join device_id_old
|on oc.device_id = device_id_old.device_id
|group by stat_date
"""
.
stripMargin
)
old_clk_count
.
show
()
//1.1有点击的老用户
val
old_clk_device
=
sc
.
sql
(
s
"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_old
|on oc.device_id = device_id_old.device_id
"""
.
stripMargin
)
old_clk_device
.
createOrReplaceTempView
(
"old_clk_device"
)
//2.当天新用户中的点击用户数
val
new_clk_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(oc.device_id) as new_clk_count
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
|group by stat_date
"""
.
stripMargin
)
//2.1 有点击的新用户
val
new_clk_device
=
sc
.
sql
(
s
"""
|select distinct(oc.device_id) as device_id
|from all_clk_diary_card oc inner join device_id_new
|on oc.device_id = device_id_new.device_id
"""
.
stripMargin
)
new_clk_device
.
createOrReplaceTempView
(
"new_clk_device"
)
//3.当天老用户数
val
old_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(dio.device_id)) as old_count
|from device_id_old dio inner join agency_id
|on dio.device_id = agency_id.device_id
"""
.
stripMargin
)
//4.当天新用户数
val
new_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(distinct(din.device_id)) as new_count
|from device_id_new din inner join agency_id
|on din.device_id = agency_id.device_id
"""
.
stripMargin
)
//5.有点击老用户的曝光数
val
exp_clkold_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clkold_count
|from data_feed_exposure_precise dp inner join old_clk_device
|on dp.device_id = old_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
"""
.
stripMargin
)
//6.有点击新用户的曝光数
val
exp_clknew_count
=
sc
.
sql
(
s
"""
|select '${stat_date}' as stat_date,count(dp.device_id) as imp_clknew_count
|from data_feed_exposure_precise dp inner join new_clk_device
|on dp.device_id = new_clk_device.device_id
|where stat_date='${stat_date}'
|group by stat_date
"""
.
stripMargin
)
val
result
=
old_clk_count
.
join
(
new_clk_count
,
"stat_date"
)
.
join
(
old_count
,
"stat_date"
)
.
join
(
new_count
,
"stat_date"
)
.
join
(
exp_clkold_count
,
"stat_date"
)
.
join
(
exp_clknew_count
,
"stat_date"
)
GmeiConfig
.
writeToJDBCTable
(
result
,
"device_clk_imp_reason"
,
SaveMode
.
Append
)
}
}
}
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
d75dbdb2
...
@@ -76,22 +76,24 @@ object testt {
...
@@ -76,22 +76,24 @@ object testt {
|AND pv_ratio >= 0.95
|AND pv_ratio >= 0.95
"""
.
stripMargin
"""
.
stripMargin
)
)
agency_id
.
show
()
//
agency_id.show()
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
//每日新用户
//每日新用户
val
device_id_newUser
=
sc
.
sql
(
val
device_id_newUser
=
sc
.
sql
(
s
"""
s
"""
|select distinct(device_id) as device_id
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status
|from online.ml_device_day_active_status os left join blacklist
|where active_type != '4'
|on os.device_id=blacklist.device_id
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|where os.active_type != '4'
|and os.first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}'
|and os.partition_date ='${partition_date}'
|and blacklist.device_id is null
"""
.
stripMargin
"""
.
stripMargin
)
)
device_id_newUser
.
show
()
device_id_newUser
.
show
()
...
@@ -103,19 +105,19 @@ object testt {
...
@@ -103,19 +105,19 @@ object testt {
|select distinct(os.device_id) as device_id
|select distinct(os.device_id) as device_id
|from online.ml_device_day_active_status os left join blacklist
|from online.ml_device_day_active_status os left join blacklist
|on os.device_id=blacklist.device_id
|on os.device_id=blacklist.device_id
|where active_type = '4'
|where
os.
active_type = '4'
|and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
|and
os.
first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and partition_date ='${partition_date}'
|and
os.
partition_date ='${partition_date}'
|and blacklist.device_id is null
|and blacklist.device_id is null
"""
.
stripMargin
"""
.
stripMargin
)
)
device_id_oldUser
.
show
()
//
device_id_oldUser.show()
device_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
device_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment