From 164a6a7d93b178cdf542d76c514aba474d845c63 Mon Sep 17 00:00:00 2001 From: litaolemo <593516104@qq.com> Date: Tue, 17 Nov 2020 14:45:16 +0800 Subject: [PATCH] update --- task/spark_test.py | 65 ---------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/task/spark_test.py b/task/spark_test.py index 2fc882f..9a12c57 100644 --- a/task/spark_test.py +++ b/task/spark_test.py @@ -196,72 +196,7 @@ FROM ( AND action = 'on_click_card' AND params['card_type'] = 'highlight_word' ) click - LEFT JOIN - ( - select distinct device_id - from ml.ml_d_ct_dv_devicespam_d --去除机构刷å•è®¾å¤‡ï¼Œå³ä½œå¼Šè®¾å¤‡ï¼ˆæµè§ˆå’Œæ›å…‰äº‹ä»¶åŽ»é™¤ï¼‰ - WHERE partition_day='{start_date}' - union all - - select distinct device_id - from dim.dim_device_user_staff --去除内网用户 - )spam_pv - on spam_pv.device_id=click.cl_id - LEFT JOIN - ( - SELECT partition_date,device_id - FROM - (--找出user_id当天活跃的第一个设备id - SELECT user_id,partition_date, - if(size(device_list) > 0, device_list [ 0 ], '') AS device_id - FROM online.ml_user_updates - WHERE partition_date='{start_date}' - )t1 - JOIN - ( --åŒ»ç”Ÿè´¦å· - SELECT distinct user_id - FROM online.tl_hdfs_doctor_view - WHERE partition_date = '{start_date}' - - --马甲账å·/模特用户 - UNION ALL - SELECT user_id - FROM ml.ml_c_ct_ui_user_dimen_d - WHERE partition_day = '{start_date}' - AND (is_puppet = 'true' or is_classifyuser = 'true') - - UNION ALL - --å…¬å¸å†…网覆盖用户 - select distinct user_id - from dim.dim_device_user_staff - - UNION ALL - --登陆过医生设备 - SELECT distinct t1.user_id - FROM - ( - SELECT user_id, v.device_id as device_id - FROM online.ml_user_history_detail - LATERAL VIEW EXPLODE(device_history_list) v AS device_id - WHERE partition_date = '{start_date}' - ) t1 - JOIN - ( - SELECT device_id - FROM online.ml_device_history_detail - WHERE partition_date = '{start_date}' - AND is_login_doctor = '1' - ) t2 - ON t1.device_id = t2.device_id - )t2 - on t1.user_id=t2.user_id - group by partition_date,device_id - )dev - on click.partition_date=dev.partition_date and click.cl_id=dev.device_id - WHERE spam_pv.device_id IS NULL - and dev.device_id is null - GROUP BY click.partition_date, query ) t3 order by all_search_uv asc -- 2.18.0