Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
7ee9d665
Commit
7ee9d665
authored
Apr 19, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
重新跑数据
parent
60ab661f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
60 deletions
+46
-60
temp_count.scala
eda/feededa/src/main/scala/com/gmei/temp_count.scala
+16
-4
testt.scala
eda/feededa/src/main/scala/com/gmei/testt.scala
+30
-56
No files found.
eda/feededa/src/main/scala/com/gmei/temp_count.scala
View file @
7ee9d665
...
...
@@ -168,7 +168,10 @@ object temp_count {
// GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",result1, table="ffm_diary_ctr",SaveMode.Append)
println
(
"开始写入"
)
GmeiConfig
.
writeToJDBCTable
(
"jerry.jdbcuri"
,
result1
,
table
=
"ffm_diary_ctr"
,
SaveMode
.
Append
)
// GmeiConfig.writeToJDBCTable("jerry.jdbcuri",result1, table="ffm_diary_ctr",SaveMode.Append)
// println("写入完成")
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
result1
,
table
=
"ffm_diary_ctr"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
...
...
@@ -344,7 +347,10 @@ object Repeated_content_recommendation {
// GmeiConfig.writeToJDBCTable(df2, table = "Repeated_evaluation_indicator", SaveMode.Append)
println
(
"开始写入"
)
GmeiConfig
.
writeToJDBCTable
(
"jerry.jdbcuri"
,
df2
,
table
=
"Repeated_evaluation_indicator"
,
SaveMode
.
Append
)
// GmeiConfig.writeToJDBCTable("jerry.jdbcuri",df2, table="Repeated_evaluation_indicator",SaveMode.Append)
// println("写入完成")
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
df2
,
table
=
"Repeated_evaluation_indicator"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
// val exp_diary_old = sc.sql(
...
...
@@ -474,7 +480,10 @@ object Repeated_content_recommendation_moreday {
// GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",df_result, table="Repeated_content_recommendation_moreday",SaveMode.Append)
println
(
"开始写入"
)
GmeiConfig
.
writeToJDBCTable
(
"jerry.jdbcuri"
,
df_result
,
table
=
"Repeated_content_recommendation_moreday"
,
SaveMode
.
Append
)
// GmeiConfig.writeToJDBCTable("jerry.jdbcuri",df_result, table="Repeated_content_recommendation_moreday",SaveMode.Append)
// println("写入完成")
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
df_result
,
table
=
"Repeated_content_recommendation_moreday"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
...
...
@@ -648,7 +657,10 @@ object GetHiveSearchData {
// GmeiConfig.writeToJDBCTable("jdbc:mysql://152.136.44.138:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true",df_result, table="GetHiveSearchData_CTR",SaveMode.Append)
println
(
"开始写入"
)
GmeiConfig
.
writeToJDBCTable
(
"jerry.jdbcuri"
,
df_result
,
table
=
"GetHiveSearchData_CTR"
,
SaveMode
.
Append
)
// GmeiConfig.writeToJDBCTable("jerry.jdbcuri",df_result, table="GetHiveSearchData_CTR",SaveMode.Append)
// println("写入完成")
GmeiConfig
.
writeToJDBCTable
(
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=root&password=3SYz54LS9#^9sBvC&rewriteBatchedStatements=true"
,
df_result
,
table
=
"GetHiveSearchData_CTR"
,
SaveMode
.
Append
)
println
(
"写入完成"
)
...
...
eda/feededa/src/main/scala/com/gmei/testt.scala
View file @
7ee9d665
...
...
@@ -62,10 +62,6 @@ object testt {
)
blacklist
.
createOrReplaceTempView
(
"blacklist"
)
// sc.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
// sc.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
// sc.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
// sc.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
val
agency_id
=
sc
.
sql
(
...
...
@@ -83,7 +79,7 @@ object testt {
|AND pv_ratio >= 0.95
"""
.
stripMargin
)
//
agency_id.show()
agency_id
.
show
()
agency_id
.
createOrReplaceTempView
(
"agency_id"
)
//每日新用户
val
device_id_newUser
=
sc
.
sql
(
...
...
@@ -124,7 +120,7 @@ object testt {
|and blacklist.device_id is null
"""
.
stripMargin
)
//
device_id_oldUser.show()
device_id_oldUser
.
show
()
device_id_oldUser
.
createOrReplaceTempView
(
"device_id_old"
)
...
...
@@ -142,6 +138,9 @@ object testt {
"""
.
stripMargin
)
diary_meigou_temp
.
createOrReplaceTempView
(
"diary_meigou_temp"
)
diary_meigou_temp
.
show
()
val
diary_meigou_device
=
sc
.
sql
(
s
"""
|select dt.device_id
...
...
@@ -151,6 +150,8 @@ object testt {
"""
.
stripMargin
)
diary_meigou_device
.
createOrReplaceTempView
(
"diary_meigou_device"
)
diary_meigou_device
.
show
()
//新用户到美购详情页的转化
val
diary_meigou_newUser
=
sc
.
sql
(
s
"""
...
...
@@ -160,6 +161,7 @@ object testt {
|where device_id_new.device_id is not null
"""
.
stripMargin
)
diary_meigou_newUser
.
show
()
//老用户到美购详情页的转化
val
diary_meigou_oldUser
=
sc
.
sql
(
s
"""
...
...
@@ -169,20 +171,9 @@ object testt {
|where device_id_old.device_id is not null
"""
.
stripMargin
)
diary_meigou_oldUser
.
show
()
// val diary_meigou_count = sc.sql(
// s"""
// |select '${stat_date}' as stat_date, count(page_name) as diary_meigou_count
// |from online.bl_hdfs_page_view_updates ou left join agency_id
// |on ou.cl_id = agency_id.device_id
// |where ou.partition_date = '${partition_date}'
// |and ou.page_name='welfare_detail'
// |and ou.referrer='diary_detail'
// |and agency_id.device_id is null
// |and ou.cl_id not in (select device_id from blacklist)
// """.stripMargin
// )
// diary_meigou_count.show()
//2.日记本点击数
val
diary_clk_temp
=
sc
.
sql
(
...
...
@@ -197,6 +188,7 @@ object testt {
"""
.
stripMargin
)
diary_clk_temp
.
createOrReplaceTempView
(
"diary_clk_temp"
)
diary_clk_temp
.
show
()
val
diary_clk_device
=
sc
.
sql
(
s
"""
...
...
@@ -208,6 +200,7 @@ object testt {
)
diary_clk_device
.
createOrReplaceTempView
(
"diary_clk_device"
)
diary_clk_device
.
show
()
//新用户日记本点击
val
diary_clk_newUser
=
sc
.
sql
(
...
...
@@ -218,6 +211,7 @@ object testt {
|where device_id_new.device_id is not null
"""
.
stripMargin
)
diary_clk_newUser
.
show
()
//老用户日记本点击
val
diary_clk_oldUser
=
sc
.
sql
(
s
"""
...
...
@@ -227,19 +221,8 @@ object testt {
|where device_id_old.device_id is not null
"""
.
stripMargin
)
// val diary_clk = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(cl_id) as diary_clk
// |from online.tl_hdfs_maidian_view ov left join agency_id
// |on ov.cl_id = agency_id.device_id
// |where ov.action = 'on_click_diary_card'
// |and ov.cl_id != "NULL"
// |and ov.partition_date='${partition_date}'
// |and agency_id.device_id is null
// |and ov.cl_id not in (select device_id from blacklist)
// """.stripMargin
// )
// diary_clk.show()
diary_clk_oldUser
.
show
()
//3.日记本曝光数
val
diary_expoure_temp
=
sc
.
sql
(
...
...
@@ -254,6 +237,7 @@ object testt {
"""
.
stripMargin
)
diary_expoure_temp
.
createOrReplaceTempView
(
"diary_expoure_temp"
)
diary_expoure_temp
.
show
()
val
diary_expoure_device
=
sc
.
sql
(
s
"""
...
...
@@ -263,6 +247,7 @@ object testt {
|where blacklist.device_id is null
"""
.
stripMargin
)
diary_expoure_device
.
show
()
diary_expoure_device
.
createOrReplaceTempView
(
"diary_expoure_device"
)
//新用户日记本曝光
...
...
@@ -274,6 +259,8 @@ object testt {
|where device_id_new.device_id is not null
"""
.
stripMargin
)
diary_exp_newUser
.
show
()
//老用户日记本曝光
val
diary_exp_oldUser
=
sc
.
sql
(
s
"""
...
...
@@ -283,20 +270,9 @@ object testt {
|where device_id_old.device_id is not null
"""
.
stripMargin
)
diary_exp_oldUser
.
show
()
// val diary_expoure=sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(cl_id) as diary_expoure
// |from online.ml_community_exposure_detail od left join agency_id
// |on od.cl_id = agency_id.device_id
// |where od.business_type = "diary"
// |and od.cl_id != "NULL"
// |and od.partition_date='${partition_date}'
// |and agency_id.device_id is null
// |and od.cl_id not in (select device_id from blacklist)
// """.stripMargin
// )
// diary_expoure.show()
//4.搜索次数统计
val
search_device_temp
=
sc
.
sql
(
...
...
@@ -310,6 +286,7 @@ object testt {
"""
.
stripMargin
)
search_device_temp
.
createOrReplaceTempView
(
"search_device_temp"
)
search_device_temp
.
show
()
val
search_device
=
sc
.
sql
(
s
"""
...
...
@@ -320,6 +297,7 @@ object testt {
"""
.
stripMargin
)
search_device
.
createOrReplaceTempView
(
"search_device"
)
search_device
.
show
()
//新用户搜索次数
val
search_newUser
=
sc
.
sql
(
s
"""
...
...
@@ -329,6 +307,7 @@ object testt {
|where device_id_new.device_id is not null
"""
.
stripMargin
)
search_newUser
.
show
()
//老用户日搜索次数
val
search_oldUser
=
sc
.
sql
(
s
"""
...
...
@@ -338,6 +317,7 @@ object testt {
|where device_id_old.device_id is not null
"""
.
stripMargin
)
search_oldUser
.
show
()
//5.登录人数
val
log_device_temp
=
sc
.
sql
(
...
...
@@ -350,6 +330,7 @@ object testt {
"""
.
stripMargin
)
log_device_temp
.
createOrReplaceTempView
(
"log_device_temp"
)
log_device_temp
.
show
()
val
log_device
=
sc
.
sql
(
s
"""
...
...
@@ -360,6 +341,7 @@ object testt {
"""
.
stripMargin
)
log_device
.
createOrReplaceTempView
(
"log_device"
)
log_device
.
show
()
//新用户登录人数
val
log_newUser
=
sc
.
sql
(
...
...
@@ -370,6 +352,7 @@ object testt {
|where device_id_new.device_id is not null
"""
.
stripMargin
)
log_newUser
.
show
()
//老用户登录人数
val
log_oldUser
=
sc
.
sql
(
s
"""
...
...
@@ -379,17 +362,8 @@ object testt {
|where device_id_old.device_id is not null
"""
.
stripMargin
)
// val log_num = sc.sql(
// s"""
// |select '${stat_date}' as stat_date,count(distinct(device_id)) as log_num
// |from data_feed_exposure oe left join agency_id
// |on oe.device_id = agency_id.device_id
// |and oe.stat_date ='${stat_date}'
// |and agency_id.device_id is null
// |and oe.device_id not in (select device_id from blacklist)
// """.stripMargin
// )
// log_num.show()
log_oldUser
.
show
()
val
result
=
diary_meigou_newUser
.
join
(
diary_meigou_oldUser
,
"stat_date"
)
.
join
(
diary_clk_newUser
,
"stat_date"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment