Commit 17b1f603 authored by 王志伟's avatar 王志伟

核对日记本PV

parent 54bac5be
...@@ -206,6 +206,7 @@ object WeafareStat { ...@@ -206,6 +206,7 @@ object WeafareStat {
val result5 =result4.withColumn("diary_expoure_meigou_rate",result.col("diary_meigou_count")/result.col("diary_expoure")) val result5 =result4.withColumn("diary_expoure_meigou_rate",result.col("diary_meigou_count")/result.col("diary_expoure"))
result5.show() result5.show()
GmeiConfig.writeToJDBCTable(result5, "diary_meigou_cvr", SaveMode.Append)
sc.stop() sc.stop()
......
...@@ -44,27 +44,47 @@ object testt { ...@@ -44,27 +44,47 @@ object testt {
ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video") ti.tidbMapTable(dbName = "jerry_prod", tableName = "diary_video")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click") ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_click")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist") ti.tidbMapTable(dbName = "jerry_prod", tableName = "blacklist")
ti.tidbMapTable(dbName = "jerry_test", tableName = "bl_device_list")
ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure") ti.tidbMapTable(dbName = "jerry_prod", tableName = "data_feed_exposure")
val view_count = sc.sql( //机构id
s""" val agency_id = sc.sql(
|select params["business_id"] as diary_id,(params["out"]-params["in"]) as dur_time s"""
|from online.tl_hdfs_maidian_view |SELECT DISTINCT(cl_id) as device_id
|where action="page_view" |FROM online.ml_hospital_spam_pv_day
|and params["page_name"]="diary_detail" |WHERE partition_date >= '20180402'
|and partition_date >='20180901' |AND partition_date <= '20181203'
|AND pv_ratio >= 0.95
|UNION ALL
|SELECT DISTINCT(cl_id) as device_id
|FROM online.ml_hospital_spam_pv_month
|WHERE partition_date >= '20171101'
|AND partition_date <= '20181203'
|AND pv_ratio >= 0.95
""".stripMargin """.stripMargin
) )
view_count.show() // agency_id.show()
view_count.createOrReplaceTempView("temp") agency_id.createOrReplaceTempView("agency_id")
val diary_pv = sc.sql(
s"""
|select ov.partition_date, count(params["business_id"]) as diary_pv
|from online.tl_hdfs_maidian_view ov left join agency_id
|on ov.cl_id = agency_id.device_id
|where ov.action="page_view"
|and ov.params["page_name"]="diary_detail"
|and (ov.params["out"]-ov.params["in"])<7200
|and agency_id.device_id is null
|and ov.partition_date >='20181120'
|and ov.cl_id not in (select distinct(device_id) from blacklist)
|group by ov.partition_date order by ov.partition_date
""".stripMargin
)
diary_pv.show()
GmeiConfig.writeToJDBCTable(view_count, "avg", SaveMode.Overwrite)
// GmeiConfig.writeToJDBCTable(view_count, "avg", SaveMode.Overwrite)
val result = view_count
result.show()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment