Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c278309b
Commit
c278309b
authored
Jan 17, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
统计多天重复日记数量
parent
851c7a59
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
8 deletions
+22
-8
temp_count.scala
eda/feededa/src/main/scala/com/gmei/temp_count.scala
+22
-8
No files found.
eda/feededa/src/main/scala/com/gmei/temp_count.scala
View file @
c278309b
...
...
@@ -9,7 +9,9 @@ import org.apache.spark.sql.{SaveMode, TiContext}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
import
com.google.
`type`
.
Date
import
com.github.nscala_time.time.Imports._
import
java.text.SimpleDateFormat
import
java.util.Date
object
temp_count
{
...
...
@@ -302,13 +304,13 @@ object Repeated_content_recommendation_moreday {
// val stat_date = GmeiConfig.getMinusNDate(1)
val
stat_date
=
param
.
date
val
stat_date
=
"2019-01-05"
// val partition_date = stat_date.replace("-","")
// val now: Date
= new Date()
// val dateFormat: SimpleD
ateFormat = new SimpleDateFormat("yyyy-MM-dd")
// val date = dateFormat.format(now.getTime - 86400000L * 18
)
val
now
=
new
Date
()
val
d
ateFormat
=
new
SimpleDateFormat
(
"yyyy-MM-dd"
)
val
date
=
dateFormat
.
format
(
now
.
getTime
-
86400000L
*
15
)
val
exp_diary
=
sc
.
sql
(
...
...
@@ -316,7 +318,8 @@ object Repeated_content_recommendation_moreday {
|select stat_date,device_id,concat_ws(',',collect_set(distinct cid_id)) as expoure_diary
|from data_feed_exposure_precise
|where cid_type = 'diary'
|and stat_date >='${stat_date}'
|and stat_date >='2018-12-20'
|and stat_date <'2019-01-05'
|group by device_id,stat_date
"""
.
stripMargin
).
rdd
.
map
(
row
=>(
row
(
0
).
toString
,
row
(
1
).
toString
,
row
(
2
).
toString
)).
map
(
row
=>(
row
.
_2
,
row
.
_3
)).
groupByKey
()
...
...
@@ -325,9 +328,20 @@ object Repeated_content_recommendation_moreday {
//打印结果
// val temp=exp_diary.take(10).foreach(println)
// val count_imp=exp_diary.map(_._2).map(row=>row.flatMap(x=>x.split(",")).toArray)
// .map(x => (x,x)).map(x => (x._1.distinct.size,x._2.size)).map(x => (x._2-x._1,x._2))
//统计每个用户重复日记个数
val
count_imp
=
exp_diary
.
map
(
_
.
_2
).
map
(
row
=>
row
.
map
(
word
=>(
word
,
1
)))
val
temp
=
count_imp
.
take
(
10
).
foreach
(
println
)
val
count_imp
=
exp_diary
.
map
(
_
.
_2
).
map
(
row
=>
row
.
flatMap
(
x
=>
x
.
split
(
","
)).
toArray
)
.
map
(
x
=>
(
x
,
x
)).
map
(
x
=>
(
x
.
_1
.
distinct
.
size
,
x
.
_2
.
size
)).
map
(
x
=>
(
x
.
_2
-
x
.
_1
,
x
.
_2
)).
collect
()
val
fenmu
=
count_imp
.
map
(
x
=>
x
.
_1
).
reduce
((
x
,
y
)
=>
x
+
y
)
val
fenzi
=
count_imp
.
map
(
x
=>
x
.
_2
).
reduce
((
x
,
y
)
=>
x
+
y
)
val
repeated_rate
=
fenmu
/
fenzi
.
toDouble
val
result
=
List
((
stat_date
,
repeated_rate
))
val
df_result
=
sc
.
createDataFrame
(
result
)
GmeiConfig
.
writeToJDBCTable
(
df_result
,
table
=
"Repeated_content_recommendation_moreday"
,
SaveMode
.
Append
)
// exp_diary.show()
// exp_diary.createOrReplaceTempView("exp_diary")
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment