Commit a59627db authored by Pengfei Xue's avatar Pengfei Xue

f

parent 5ccb22d4
select * from (
select
params['page_name'] as page_name,
(case when params['referrer'] = '' then 1 else 0 end),
cl_type,
count(1) as c
from tl_hdfs_maidian_materialized
where partition_date = '20190617' and action = 'page_view'
group by params['page_name'], (case when params['referrer'] = '' then 1 else 0 end),cl_type
having c > 50
) c
order by c.page_name
about_me_message_list 0 ios 335
about_me_message_list 1 android 185
activity_list 0 ios 1077
activity_list 0 android 502
all_case_service_comment 0 ios 13163
all_case_service_comment 1 ios 75
all_case_service_comment 0 android 8115
all_cases 0 ios 221
all_cases 0 android 179
all_sort 0 android 639
all_wiki 0 android 249
all_wiki 0 ios 563
answer_detail 0 ios 4815
answer_detail 1 android 707
answer_detail 0 android 789
article_detail 0 ios 1075
article_detail 0 android 264
bind_phone 1 ios 1306
bind_phone 1 android 790
category 0 ios 13484
category 0 android 10260
common_webview 0 ios 2400
common_webview 0 android 1428
community_home 1 ios 10776
community_home 0 android 3074
conversation_detail 1 android 956
conversation_detail 0 android 9109
conversation_detail 1 ios 235
conversation_detail 0 ios 18417
coupon_welfare_list 0 ios 515
coupon_welfare_list 0 android 277
create_answer 0 android 1355
create_diary_title 0 android 76
create_diary_title 0 ios 92
create_question 0 ios 64
create_question 0 android 73
create_report 0 android 249
create_report 0 ios 1704
csc_group 0 ios 151
csc_group 0 android 78
csc_refund 1 ios 149
csc_refund 1 android 62
diary_book_add_info 0 ios 179
diary_book_add_info 0 android 134
diary_book_topic_create 0 android 1061
diary_book_topic_create 1 android 87
diary_book_topic_create 1 ios 135
diary_book_topic_create 0 ios 953
diary_detail 1 ios 75
diary_detail 0 ios 44730
diary_detail 1 android 5748
diary_detail 0 android 19718
diary_list 0 android 287
diary_list 0 ios 205
expert_detail 0 ios 3234
expert_detail 0 android 1923
expert_infomation_detail 0 ios 152
expert_infomation_detail 0 android 74
expert_list 0 ios 62
expert_related_welfare 0 android 223
expert_related_welfare 0 ios 182
face_aim 0 ios 1871
face_aim 0 android 349
face_analyze 0 android 387
face_analyze 0 ios 2064
face_analyze_result 0 android 279
face_analyze_result 0 ios 1211
face_get_picture 0 android 658
face_get_picture 0 ios 2565
face_index 0 ios 700
face_index 0 android 225
face_my_report 0 ios 554
face_my_report 0 android 147
face_scan 0 ios 2769
face_scan 0 android 426
face_select_gender_birthday 0 ios 571
face_select_gender_birthday 0 android 132
face_select_tags 0 android 135
face_select_tags 0 ios 601
face_simulator 0 ios 256
face_simulator_result 0 ios 67
free_activity_detail 0 android 501
free_activity_detail 0 ios 836
home 0 android 10804
home 1 android 46101
home 1 ios 93629
ing 0 android 247
ing 0 ios 517
insurance_information 0 android 77
insurance_information 0 ios 90
items_select 0 android 107
items_select 0 ios 753
kefu_conversation_detail 0 ios 134
live_audience 0 android 81
message_home 0 android 10696
message_home 0 ios 2425
message_home 1 ios 18506
msg_collection_list 1 android 172
my_cart 0 android 1762
my_cart 0 ios 4133
my_coupon 0 android 594
my_coupon 1 android 104
my_coupon 1 ios 112
my_coupon 0 ios 977
my_coupon_available 0 ios 813
my_coupon_available 0 android 442
my_diary 1 android 139
my_diary 0 android 991
my_diary 0 ios 1382
my_fans 0 ios 333
my_fans 0 android 111
my_favor 0 ios 166
my_favor 0 android 90
my_focus 0 android 129
my_focus 0 ios 223
my_order 0 ios 4787
my_order 0 android 2721
my_topic 0 android 81
my_topic 0 ios 84
mytopic 1 android 446
mytopic 0 ios 636
new_sign 0 android 778
new_sign 0 ios 1416
newt_welfare_special 0 ios 5609
newt_welfare_special 1 android 70
newt_welfare_special 0 android 3693
order_detail 0 android 1791
order_detail 1 android 151
order_detail 0 ios 4036
order_pay 0 ios 1129
order_pay 0 android 897
organization_coupons 0 ios 186
organization_detail 0 ios 9753
organization_detail 0 android 5018
organization_detail_album 0 ios 142
organization_detail_album 0 android 68
organization_related_welfare 0 android 137
organization_related_welfare 0 ios 406
other_fans_list 0 android 53
other_homepage 0 android 602
other_homepage 0 ios 1076
over 0 ios 116
personal_home 0 android 9264
personal_home 1 ios 14184
personal_more 0 ios 1200
personal_more 0 android 1356
personal_profile 0 android 297
personal_profile 0 ios 334
points_shopping_mall 0 android 198
points_shopping_mall 0 ios 286
post_add_pic 0 ios 2350
post_add_pic 1 ios 293
post_create 1 android 1586
post_create 0 ios 1423
post_detail 1 android 1160
post_detail 0 android 1571
post_detail 0 ios 4732
product_detail 0 android 127
product_detail 0 ios 333
question_detail 1 android 1576
question_detail 0 android 1168
question_detail 0 ios 9162
received_answer 0 ios 127
received_answer 0 android 106
received_votes 0 android 178
received_votes 0 ios 300
register_get_coupon 0 android 318
register_get_coupon 0 ios 852
report_result 1 android 289
report_result 1 ios 1834
reset_password 0 ios 97
reset_password 0 android 63
search_home 0 android 8515
search_home 0 ios 15401
search_result 0 android 14994
search_result_diary 0 ios 3770
search_result_diary 0 android 1342
search_result_doctor 0 android 464
search_result_doctor 0 ios 1133
search_result_hospital 0 android 1049
search_result_hospital 0 ios 3042
search_result_more 0 ios 31276
search_result_more 0 android 6691
search_result_post 0 android 776
search_result_post 0 ios 1305
search_result_welfare 0 android 7891
search_result_welfare 0 ios 16681
search_result_wiki 0 android 593
search_result_wiki 0 ios 1679
select_city 0 android 954
service_comment 0 ios 105
service_explain 0 ios 56
settlement_detail 0 android 65
settlement_detail 0 ios 111
settlement_preview 0 android 2136
settlement_preview 0 ios 4198
sevencow 1 ios 135
sevencow 0 android 54
sevencow 0 ios 887
sevencow 1 android 478
share_coupon_gift 0 ios 107
share_coupon_gift 0 android 111
sign_calendar 0 ios 249
sign_calendar 0 android 226
time_album 0 android 457
time_album 0 ios 545
topic_comment_list 0 android 76
topic_comment_list 0 ios 153
topic_detail 0 ios 1309
topic_detail 1 android 231
topic_detail 0 android 1317
upcoming 0 android 70
upcoming 0 ios 175
user_task_center 0 android 305
user_task_center 0 ios 383
votecollect 0 ios 262
welfare_attribute 1 ios 13541
welfare_attribute 0 android 313
welfare_attribute 1 android 7308
welfare_detail 0 android 27893
welfare_detail 1 android 116
welfare_detail 0 ios 50680
welfare_home 1 ios 13240
welfare_home 0 android 6587
welfare_list 0 ios 7685
welfare_list 0 android 3781
welfare_list 1 android 1205
welfare_special 1 android 218
welfare_special 0 ios 1877
welfare_special 0 android 1020
wiki_brand 0 ios 54
wiki_collect 0 android 295
wiki_collect 0 ios 674
wiki_detail 0 ios 928
wiki_detail 0 android 459
zone_v3 0 ios 1866
zone_v3 1 android 503
zone_v3 0 android 596
package com.gmei.data.dq package com.gmei.data.dq
import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, Statistics}
// [41BB2B9E-C72C-4124-9650-FDBAEEEE05F8,page_view,ios,7.9.3,bind_phone,1,0,1,1,1,0,0,1,1,1] import scala.collection.mutable
// [B6DA0665-621A-40F1-8823-0B72FAEF3A48,page_view,ios,7.9.3,message_home,1,1,1,1,1,0,1,1,1,1]
// df.printSchema case class Record(page_name: String, flag: Int, cl_type: String, count: Int)
/*
|-- cl_id: string (nullable = true)
|-- action: string (nullable = true)
|-- cl_type: string (nullable = true)
|-- app_version: string (nullable = true)
|-- page_name: string (nullable = true)
|-- extra: integer (nullable = false)
|-- referrer: integer (nullable = false)
|-- is_push: integer (nullable = false)
|-- in: integer (nullable = false)
|-- out: integer (nullable = false)
|-- referrer_id: integer (nullable = false)
|-- referrer_tab_name: integer (nullable = false)
|-- bz_id: integer (nullable = false)
|-- fake: integer (nullable = false)
|-- pv: integer (nullable = false)
case class Record(
cl_id: String, action: String, cl_type: String, app_version: String, page_name: String,
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, pv: Int
)
*/
object pvCheker { object pvCheker {
def check(sc: SparkSession, partition_date: String) = { def check(sc: SparkSession, partition_date: String) = {
sc.sql("use online") import sc.implicits._
val vault: Int = 0
val x = sc.sql(
s"""
|select * from (
|select
| params['page_name'] as page_name,
| (case when params['referrer'] = '' then 1 else 0 end),
| cl_type,
| count(1) as c
|from online.tl_hdfs_maidian_view
|where partition_date = '$partition_date' and action = 'page_view'
|group by params['page_name'], (case when params['referrer'] = '' then 1 else 0 end),cl_type
|having c > $vault
|) c
|order by c.page_name
""".stripMargin)
// demo data
/* /*
* extra_param referrer is_push in out referrer_id referrer_tab_name business_id fake page_name about_me_message_list 0 ios 335
*/ about_me_message_list 1 android 185
val df = sc.sql(s"""
select
cl_id, action, cl_type, app_version,
case when params['page_name'] is null then 'NULL' else params['page_name'] end as page_name,
case when params['extra_param'] is null then 0 else 1 end as extra,
case
when params['referrer'] is null then -1
else if (params['referrer'] = '', 0, 1)
end as referrer,
case when params['is_push'] is null then 0 else 1 end as is_push,
case when params['in'] is null then 0 else 1 end as in,
case when params['out'] is null then 0 else 1 end as out,
case
when params['referrer_id'] is null then -1
else if(params['referrer_id'] = '', 0, 1)
end as referrer_id,
case
when params['referrer_tab_name'] is null then -1
else if(params['referrer_tab_name'] = '', 0, 1)
end as referrer_tab_name,
case
when params['business_id'] is null then -1
else if(params['bussiness_id'] = '', 0, 1)
end as bz_id,
case when params['fake'] is null then 0 else 1 end as fake,
1 as pv
from tl_hdfs_maidian_materialized
where partition_date=$partition_date and action = 'page_view'
""")
import sc.implicits._ activity_list 0 ios 1077
import sc.sqlContext.implicits._ activity_list 0 android 502
val y = df.rdd.map { all_case_service_comment 0 ios 13163
case Row( all_case_service_comment 1 ios 75
cl_id: String, action: String, cl_type: String, app_version: String, page_name: String, all_case_service_comment 0 android 8115
extra: Int, referrer: Int, is_push: Int, in: Int, out: Int,
referrer_id: Int, referrer_tab_name: Int, bz_id: Int, fake: Int, pv: Int
) => Seq(
extra, referrer, is_push, in,
out, referrer_id, referrer_tab_name, bz_id
).map(_ * 1.0)
}
val z = y map {i => Vectors.dense(i.toArray[Double])} all_cases 0 ios 221
val summary: MultivariateStatisticalSummary = Statistics.colStats(z) all_cases 0 android 179
println(summary.mean) // a dense vector containing the mean value for each column
println(summary.variance) // column-wise variance
println(summary.numNonzeros) // number of nonzeros in each column
println("Coefficient of Variation,CV") all_sort 0 android 639
for (i <- 1 to summary.mean.size) {
println(summary.variance(i) / summary.mean(i)) all_wiki 0 android 249
} all_wiki 0 ios 563
*/
val m = collection.mutable.Map[String, Double]()
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment