Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
8f5c89a0
Commit
8f5c89a0
authored
Jan 08, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加文件统计精准曝光
parent
4fd140f9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
418 additions
and
0 deletions
+418
-0
data_feed_exposure_precise.scala
.../src/main/scala/com/gmei/data_feed_exposure_precise.scala
+418
-0
No files found.
eda/feededa/src/main/scala/com/gmei/data_feed_exposure_precise.scala
0 → 100644
View file @
8f5c89a0
package
com.gmei
import
java.io.Serializable
import
com.gmei.WeafareStat.
{
defaultParams
,
parser
}
import
org.apache.spark.sql.
{
SaveMode
,
TiContext
}
import
org.apache.log4j.
{
Level
,
Logger
}
import
scopt.OptionParser
import
com.gmei.lib.AbstractParams
object
data_feed_exposure_precise
{
Logger
.
getLogger
(
"org.apache.spark"
).
setLevel
(
Level
.
WARN
)
Logger
.
getLogger
(
"org.apache.eclipse.jetty.server"
).
setLevel
(
Level
.
OFF
)
case
class
Params
(
env
:
String
=
"dev"
,
date
:
String
=
"2018-08-01"
)
extends
AbstractParams
[
Params
]
with
Serializable
val
defaultParams
=
Params
()
val
parser
=
new
OptionParser
[
Params
](
"Feed_EDA"
)
{
head
(
"WeafareStat"
)
opt
[
String
](
"env"
)
.
text
(
s
"the databases environment you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
env
=
x
))
opt
[
String
]
(
"date"
)
.
text
(
s
"the date you used"
)
.
action
((
x
,
c
)
=>
c
.
copy
(
date
=
x
))
note
(
"""
|For example, the following command runs this app on a tidb dataset:
|
| spark-submit --class com.gmei.WeafareStat ./target/scala-2.11/feededa-assembly-0.1.jar \
"""
.
stripMargin
+
s
"| --env ${defaultParams.env}"
)
}
def
main
(
args
:
Array
[
String
])
:
Unit
=
{
parser
.
parse
(
args
,
defaultParams
).
map
{
param
=>
GmeiConfig
.
setup
(
param
.
env
)
val
spark_env
=
GmeiConfig
.
getSparkSession
()
val
sc
=
spark_env
.
_2
val
ti
=
new
TiContext
(
sc
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"diary_video"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"blacklist"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"data_feed_exposure"
)
ti
.
tidbMapTable
(
dbName
=
"jerry_prod"
,
tableName
=
"merge_queue_table"
)
// val stat_date = GmeiConfig.getMinusNDate(1)
val
stat_date
=
param
.
date
//println(param.date)
val
partition_date
=
stat_date
.
replace
(
"-"
,
""
)
val
result01
=
sc
.
sql
(
s
"""
|select
| from_unixtime(unix_timestamp('${partition_date}' ,'yyyyMMdd'), 'yyyy-MM-dd') as stat_date,
| time_stamp as time,
| cl_id as device_id,
| channel as device_type,
| card_content_type as card_content_type ,
| business_id as business_id,
| if(card_content_type="diary", concat("diary|", business_id),
| if(card_content_type="live", concat("live|", business_id),
| if(card_content_type="question", concat("question|", business_id),
| if(card_content_type="answer", concat("answer|", business_id),
| if(card_content_type="article", concat("article|", business_id), null
| ))))) as cid,
| city_id as city_id
|from online.ml_community_precise_exposure_detail
|where card_content_type in ("article", "diary", "live", "answer", "question")
| and page_name='home'
| and tab_name="精选"
| and cl_id != "NULL"
| and partition_date='${partition_date}'
"""
.
stripMargin
)
result01
.
createOrReplaceTempView
(
"temp_result"
)
val
result02
=
sc
.
sql
(
s
"""
|select * from temp_result
|where device_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
| ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
| ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
| ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
| ,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
| ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
| ,'promotion_shike','promotion_julang_jl03','','unknown')
|and device_id not in
| (SELECT cl_id
| FROM online.ml_hospital_spam_pv_day
| WHERE partition_date>='20180402' AND partition_date<'${partition_date}'
| AND pv_ratio>=0.95
| UNION ALL
| SELECT cl_id
| FROM online.ml_hospital_spam_pv_month
| WHERE partition_date>='20171101' AND partition_date<'${partition_date}'
| AND pv_ratio>=0.95
| )
"""
.
stripMargin
)
result02
.
createOrReplaceTempView
(
"temp_result02"
)
val
result_dairy
=
sc
.
sql
(
s
"""
|select
| re.stat_date as stat_date,
| re.time as time,
| re.device_id as device_id,
| re.device_type as device_type,
| re.card_content_type as cid_type,
| re.business_id as cid_id,
| re.cid as cid,
| re.city_id as city_id,
| da.content_level as diary_content_level,
| da.created_time as diary_created_time,
| da.updated_time as diary_updated_time,
| da.service_id as diary_service_id,
| da.doctor_id as diary_doctor_id,
| da.new_topics as diary_new_topics,
| da.new_votes as diary_new_votes,
| da.new_topic_votes as diary_new_topic_votes,
| da.new_replies as diary_new_replies,
| da.new_topic_replies as diary_new_topic_replies,
| da.new_favor as diary_new_favor,
| da.show_count_choice as diary_show_count_choice,
| da.show_count as diary_show_count,
| da.click_count_choice as diary_click_count_choice,
| da.page_view as diary_page_view,
| da.user_view as diary_user_view,
| da.device_view as diary_device_view
|from temp_result02 re
|left outer join
|(
| select
| diary_id,
| content_level,
| unix_timestamp(created_time) as created_time,
| unix_timestamp(last_topic_add_time) as updated_time,
| service_id as service_id,
| doctor_id as doctor_id,
| new_topics as new_topics,
| new_votes as new_votes,
| new_topic_votes as new_topic_votes,
| new_replies as new_replies,
| new_topic_replies as new_topic_replies,
| new_favor as new_favor,
| show_count_choice as show_count_choice,
| show_count as show_count,
| click_count_choice as click_count_choice,
| page_view as page_view,
| user_view as user_view,
| device_view
| from online.ml_community_diary_updates
| where partition_date='${partition_date}'
|)da
|on re.business_id = da.diary_id and re.card_content_type = "diary"
"""
.
stripMargin
)
result_dairy
.
createOrReplaceTempView
(
"result_dairy"
)
val
result_answer
=
sc
.
sql
(
s
"""
|select
| rd.stat_date as stat_date,
| rd.time as time,
| rd.device_id as device_id,
| rd.device_type as device_type,
| rd.cid_type as cid_type,
| rd.cid_id as cid_id,
| rd.cid as cid,
| rd.city_id as city_id,
| an.is_recommend as is_recommend,
| if(rd.diary_new_votes is Null, an.new_votes, rd.diary_new_votes) as new_votes,
| if(rd.diary_new_replies is Null, an.reply_num, rd.diary_new_replies) as reply_num,
| if(rd.diary_content_level is Null, an.content_level, rd.diary_content_level) as content_level,
| if(rd.diary_created_time is Null, an.created_time, rd.diary_created_time) as created_time,
| rd.diary_updated_time as diary_updated_time,
| rd.diary_service_id as diary_service_id,
| rd.diary_doctor_id as diary_doctor_id,
| rd.diary_new_topics as diary_new_topics,
| rd.diary_new_topic_votes as diary_new_topic_votes,
| rd.diary_new_replies as diary_new_replies,
| rd.diary_new_topic_replies as diary_new_topic_replies,
| rd.diary_new_favor as diary_new_favor,
|
| an.reply_vote_num as answer_reply_vote_num,
|
| if(rd.diary_show_count_choice is Null, an.show_count_choice, rd.diary_show_count_choice ) as show_count_choice,
| if(rd.diary_show_count is Null, an.show_count, rd.diary_show_count ) as show_count,
| if(rd.diary_click_count_choice is Null, an.click_count_choice, rd.diary_click_count_choice) as click_count_choice,
| if(rd.diary_page_view is Null, an.page_view, rd.diary_page_view ) as page_view,
| if(rd.diary_user_view is Null, an.user_view, rd.diary_user_view ) as user_view,
| if(rd.diary_device_view is Null, an.device_view, rd.diary_device_view) as device_view
|
|from result_dairy rd
|left outer join
|(
| select
| answer_id,
| is_recommend,
| unix_timestamp(created_time) as created_time,
| content_level as content_level,
| vote_num as new_votes,
| reply_vote_num as reply_vote_num,
| reply_num as reply_num,
| show_count_choice as show_count_choice,
| show_count as show_count,
| click_count_choice as click_count_choice,
| page_view as page_view,
| user_view as user_view,
| device_view as device_view
| from online.ml_community_answer_updates
| where partition_date='${partition_date}'
|)an
|on rd.cid_id = an.answer_id and rd.cid_type = "answer"
"""
.
stripMargin
)
result_answer
.
createOrReplaceTempView
(
"result_answer"
)
val
result_article
=
sc
.
sql
(
s
"""
|select
| ra.stat_date as stat_date,
| ra.time as time,
| ra.device_id as device_id,
| ra.device_type as device_type,
| ra.cid_type as cid_type,
| ra.cid_id as cid_id,
| ra.cid as cid,
| ra.city_id as city_id,
| if(ra.is_recommend is Null, ar.is_push, ra.is_recommend) as is_recommend,
| ar.article_type as article_type,
| if(ra.new_votes is Null, ar.vote_num, ra.new_votes) as new_votes,
| if(ra.reply_num is Null, ar.reply_num, ra.reply_num) as reply_num,
| ra.content_level as content_level,
| if(ra.created_time is Null, ar.created_time, ra.created_time) as created_time,
| ra.diary_updated_time as diary_updated_time,
| ra.diary_service_id as diary_service_id,
| ra.diary_doctor_id as diary_doctor_id,
| ra.diary_new_topics as diary_new_topics,
| ra.diary_new_replies as diary_new_replies,
| ra.diary_new_topic_votes as diary_new_topic_votes,
| ra.diary_new_topic_replies as diary_new_topic_replies,
| ra.diary_new_favor as diary_new_favor,
|
| if(ra.answer_reply_vote_num is Null, ar.reply_vote_num, ra.answer_reply_vote_num) as reply_vote_num,
|
| if(ra.show_count_choice is Null, ar.show_count_choice, ra.show_count_choice ) as show_count_choice,
| if(ra.show_count is Null, ar.show_count, ra.show_count ) as show_count,
| if(ra.click_count_choice is Null, ar.click_count_choice, ra.click_count_choice) as click_count_choice,
| if(ra.page_view is Null, ar.page_view, ra.page_view ) as page_view,
| if(ra.user_view is Null, ar.user_view, ra.user_view ) as user_view,
| if(ra.device_view is Null, ar.device_view, ra.device_view) as device_view
|from result_answer ra
|left outer join
|(
| select
| article_id,
| unix_timestamp(created_time) as created_time,
| article_type as article_type,
| is_push,
| vote_num,
| reply_vote_num,
| reply_num,
| show_count_choice,
| show_count,
| click_count_choice,
| page_view,
| user_view,
| device_view
| from online.ml_community_article_updates
| where partition_date='${partition_date}'
|)ar
|on ra.cid_id = ar.article_id and ra.cid_type="article"
"""
.
stripMargin
)
result_article
.
createOrReplaceTempView
(
"result_article"
)
val
result_question
=
sc
.
sql
(
s
"""
|select
| ra.stat_date as stat_date,
| ra.time as time,
| ra.device_id as device_id,
| ra.device_type as device_type,
| ra.cid_type as cid_type,
| ra.cid_id as cid_id,
| ra.cid as cid,
| ra.city_id as city_id,
| if(ra.is_recommend is Null, qu.is_recommend, ra.is_recommend) as is_recommend,
| ra.article_type as article_type,
| if(ra.new_votes is Null, qu.vote_num, ra.new_votes) as new_votes,
| if(ra.reply_num is Null, qu.reply_num, ra.reply_num) as reply_num,
| ra.content_level as content_level,
| if(ra.created_time is Null, qu.created_time, ra.created_time) as created_time,
| ra.diary_updated_time as diary_updated_time,
| ra.diary_service_id as diary_service_id,
| ra.diary_doctor_id as diary_doctor_id,
| ra.diary_new_topics as diary_new_topics,
| ra.diary_new_replies as diary_new_replies,
| ra.diary_new_topic_votes as diary_new_topic_votes,
| ra.diary_new_topic_replies as diary_new_topic_replies,
| ra.diary_new_favor as diary_new_favor,
|
| ra.reply_vote_num as reply_vote_num,
| qu.answer_reply_num as question_answer_reply_num,
|
| if(ra.show_count_choice is Null, qu.show_count_choice, ra.show_count_choice ) as show_count_choice,
| if(ra.show_count is Null, qu.show_count, ra.show_count ) as show_count,
| if(ra.click_count_choice is Null, qu.click_count_choice, ra.click_count_choice) as click_count_choice,
| if(ra.page_view is Null, qu.page_view, ra.page_view ) as page_view,
| if(ra.user_view is Null, qu.user_view, ra.user_view ) as user_view,
| if(ra.device_view is Null, qu.device_view, ra.device_view) as device_view
|from result_article ra
|left outer join
|(
| select
| question_id,
| unix_timestamp(created_time) as created_time,
| is_recommend,
| answer_num as reply_num,
| vote_num,
| reply_num as answer_reply_num,
| show_count_choice,
| show_count,
| click_count_choice,
| page_view,
| user_view,
| device_view
| from online.ml_community_question_updates
| where partition_date='${partition_date}'
|)qu
|on ra.cid_id = qu.question_id and ra.cid_type="question"
"""
.
stripMargin
)
result_question
.
createOrReplaceTempView
(
"result_question"
)
val
result
=
sc
.
sql
(
s
"""
|select
| rq.stat_date as stat_date,
| rq.time as time,
| rq.device_id as device_id,
| rq.device_type as device_type,
| rq.cid_type as cid_type,
| rq.cid_id as cid_id,
| rq.cid as cid,
| rq.city_id as city_id,
| rq.is_recommend,
| rq.article_type,
| rq.new_votes,
| rq.reply_num,
| rq.content_level,
| if(rq.created_time is Null, li.created_time, rq.created_time) as created_time,
| if(rq.diary_updated_time is Null, li.updated_time, rq.diary_updated_time) as updated_time,
| rq.diary_service_id as diary_service_id,
| rq.diary_doctor_id as diary_doctor_id,
| rq.diary_new_topics as diary_new_topics,
| rq.diary_new_replies as diary_new_replies,
| rq.diary_new_topic_votes as diary_new_topic_votes,
| rq.diary_new_topic_replies as diary_new_topic_replies,
| rq.diary_new_favor as diary_new_favor,
|
| rq.reply_vote_num as reply_vote_num,
| rq.question_answer_reply_num,
|
| rq.show_count_choice,
| if(rq.show_count is Null, li.show_count, rq.show_count ) as show_count,
| rq.click_count_choice,
| rq.page_view,
| rq.user_view,
| rq.device_view,
|
| li.fake_max_num as live_fake_max_num,
| li.topic_id as live_topic_id,
| li.max_view_num as live_max_view_num,
| li.is_finish as live_is_finish
|from result_question rq
|left outer join
|(
| select
| channel_id,
| unix_timestamp(created_time) as created_time,
| unix_timestamp(updated_time) as updated_time,
| pv as show_count,
| fake_max_num,
| topic_id,
| max_view_num,
| replay_danmu,
| is_finish
| from online.ml_community_live_updates
| where partition_date='${partition_date}'
|)li
|on rq.cid_id=li.channel_id and rq.cid_type="live"
"""
.
stripMargin
)
GmeiConfig
.
writeToJDBCTable
(
result
,
table
=
"data_feed_exposure_precise"
,
SaveMode
.
Append
)
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment