Commit c278309b authored by 王志伟's avatar 王志伟

统计多天重复日记数量

parent 851c7a59
......@@ -9,7 +9,9 @@ import org.apache.spark.sql.{SaveMode, TiContext}
import org.apache.log4j.{Level, Logger}
import scopt.OptionParser
import com.gmei.lib.AbstractParams
import com.google.`type`.Date
import com.github.nscala_time.time.Imports._
import java.text.SimpleDateFormat
import java.util.Date
object temp_count {
......@@ -302,13 +304,13 @@ object Repeated_content_recommendation_moreday {
// val stat_date = GmeiConfig.getMinusNDate(1)
val stat_date = param.date
val stat_date = "2019-01-05"
// val partition_date = stat_date.replace("-","")
// val now: Date = new Date()
// val dateFormat: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
// val date = dateFormat.format(now.getTime - 86400000L * 18)
val now= new Date()
val dateFormat = new SimpleDateFormat("yyyy-MM-dd")
val date = dateFormat.format(now.getTime - 86400000L * 15)
val exp_diary = sc.sql(
......@@ -316,7 +318,8 @@ object Repeated_content_recommendation_moreday {
|select stat_date,device_id,concat_ws(',',collect_set(distinct cid_id)) as expoure_diary
|from data_feed_exposure_precise
|where cid_type = 'diary'
|and stat_date >='${stat_date}'
|and stat_date >='2018-12-20'
|and stat_date <'2019-01-05'
|group by device_id,stat_date
""".stripMargin
).rdd.map(row=>(row(0).toString,row(1).toString,row(2).toString)).map(row=>(row._2,row._3)).groupByKey()
......@@ -325,9 +328,20 @@ object Repeated_content_recommendation_moreday {
//打印结果
// val temp=exp_diary.take(10).foreach(println)
// val count_imp=exp_diary.map(_._2).map(row=>row.flatMap(x=>x.split(",")).toArray)
// .map(x => (x,x)).map(x => (x._1.distinct.size,x._2.size)).map(x => (x._2-x._1,x._2))
//统计每个用户重复日记个数
val count_imp=exp_diary.map(_._2).map(row=>row.map(word=>(word,1)))
val temp=count_imp.take(10).foreach(println)
val count_imp=exp_diary.map(_._2).map(row=>row.flatMap(x=>x.split(",")).toArray)
.map(x => (x,x)).map(x => (x._1.distinct.size,x._2.size)).map(x => (x._2-x._1,x._2)).collect()
val fenmu = count_imp.map(x => x._1).reduce((x,y) => x+y)
val fenzi = count_imp.map(x => x._2).reduce((x,y) => x+y)
val repeated_rate= fenmu / fenzi.toDouble
val result=List((stat_date,repeated_rate))
val df_result = sc.createDataFrame(result)
GmeiConfig.writeToJDBCTable(df_result, table = "Repeated_content_recommendation_moreday", SaveMode.Append)
// exp_diary.show()
// exp_diary.createOrReplaceTempView("exp_diary")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment