Commit 1d686061 authored by Pengfei Xue's avatar Pengfei Xue

compute pv percent

parent 5365f98c
...@@ -6,6 +6,37 @@ object pvCheker { ...@@ -6,6 +6,37 @@ object pvCheker {
def check(sc: SparkSession, partition_date: String) = { def check(sc: SparkSession, partition_date: String) = {
sc.sql("use online") sc.sql("use online")
/*
* extra_param referrer is_push in out referrer_id referrer_tab_name business_id fake page_name
*/
val df = sc.sql(s"""
select
cl_id, action, cl_type, app_version, params['page_name'],
case when params['extra_param'] is null then 0 else 1 end as extra,
case when params['referrer'] is null or params['referrer'] = '' then 0 else 1 end as referrer,
case when params['is_push'] is null then 0 else 1 end as is_push,
case when params['in'] is null then 0 else 1 end as in,
case when params['out'] is null then 0 else 1 end as out,
case when params['referrer_id'] is null then 0 else 1 end as referrer_id,
case when params['referrer_tab_name'] is null then 0 else 1 end as referrer_tab_name,
case when params['business_id'] is null then 0 else 1 end as bz_id,
case when params['fake'] is null then 0 else 1 end as fake,
1 as pv
from tl_hdfs_maidian_materialized
where partition_date=$partition_date and action = 'page_view'
""")
val x = df.createOrReplaceTempView("maidian_pv")
val y = sc.sql("""
select t.page_name, t.cl_type, 1.0 * count(1) / t.pv as percent
from (
select page_name, cl_type, sum(pv) over (partition by page_name) as pv
from maidian_pv
) t
group by t.page_name, t.cl_type
""")
y.show()
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment