Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
4637e2bc
Commit
4637e2bc
authored
Jan 16, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
计算多天的推荐重复问题
parent
c593113d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
22 deletions
+14
-22
temp_count.scala
eda/feededa/src/main/scala/com/gmei/temp_count.scala
+14
-22
No files found.
eda/feededa/src/main/scala/com/gmei/temp_count.scala
View file @
4637e2bc
package
com.gmei
package
com.gmei
import
java.io.Serializable
import
java.io.Serializable
import
java.text.SimpleDateFormat
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
import
com.gmei.lib.AbstractParams
import
com.google.
`type`
.
Date
object
temp_count
{
object
temp_count
{
...
@@ -298,40 +300,30 @@ object Repeated_content_recommendation_moreday {
...
@@ -298,40 +300,30 @@ object Repeated_content_recommendation_moreday {
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
val
stat_date
=
GmeiConfig
.
getMinusNDate
(
1
)
// val stat_date = GmeiConfig.getMinusNDate(1)
// val stat_date = param.date
val
stat_date
=
param
.
date
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
// val partition_date = stat_date.replace("-","")
// val now: Date = new Date()
// val dateFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
// val date = dateFormat.format(now.getTime - 86400000L * 18)
val
exp_diary
=
sc
.
sql
(
val
exp_diary
=
sc
.
sql
(
s
"""
s
"""
|select
concat_ws('|',device_id,cid_id
)
|select
stat_date,device_id,group_concat(distinct cid_id SEPARATOR ','
)
|from data_feed_exposure
|from data_feed_exposure
_precise
|where cid_type = 'diary'
|where cid_type = 'diary'
|and device_id not in (select device_id from blacklist)
|and device_id not in (select device_id from blacklist)
|and stat_date ='${stat_date}'
|and stat_date
>
='${stat_date}'
"""
.
stripMargin
"""
.
stripMargin
)
)
exp_diary
.
show
()
exp_diary
.
show
()
val
get_result
=
exp_diary
.
rdd
.
map
((
_
,
1
)).
reduceByKey
(
_
+
_
)
.
sortBy
(
_
.
_2
,
false
)
val
more_than2
=
get_result
.
filter
(
_
.
_2
>=
2
).
map
(
_
.
_2
).
reduce
((
x
,
y
)
=>
x
+
y
)
println
(
more_than2
)
val
all
=
get_result
.
map
(
_
.
_2
).
reduce
((
x
,
y
)
=>
x
+
y
)
println
(
all
)
val
repeated_rate
=
more_than2
/
all
.
toDouble
println
(
repeated_rate
)
val
test
=
List
((
stat_date
,
repeated_rate
))
val
df
=
sc
.
createDataFrame
(
test
)
GmeiConfig
.
writeToJDBCTable
(
df
,
table
=
"Repeated_evaluation_indicator_moreday"
,
SaveMode
.
Append
)
//
GmeiConfig.writeToJDBCTable(df, table = "Repeated_evaluation_indicator_moreday", SaveMode.Append)
// val temp=get_result.collect()
// for (i <- 0 until 30 ) {
// println(temp(i))
// }
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment