diff --git a/new_user_analysis/user_behavior_path.py b/new_user_analysis/user_behavior_path.py index d058f83c4e633bec28361dadebd954999706ef6b..be237ff6836347eef72ae62135519bce38b5f731 100644 --- a/new_user_analysis/user_behavior_path.py +++ b/new_user_analysis/user_behavior_path.py @@ -99,71 +99,7 @@ for t in range(1, task_days): new_urser_device_id_sql = r""" select t2.device_id from (select distinct(first_device) as device_id from online.ml_user_history_detail where partition_date = '{today_str}' and date_joined >= '{yesterday_str}') t2 - LEFT JOIN - ( - select distinct device_id - from ml.ml_d_ct_dv_devicespam_d --去除机构刷å•设备,å³ä½œå¼Šè®¾å¤‡ï¼ˆæµè§ˆå’Œæ›å…‰äº‹ä»¶åŽ»é™¤ï¼‰ - WHERE partition_day='{today_str}' - - union all - - select distinct device_id - from dim.dim_device_user_staff --去除内网用户 - )spam_pv - on spam_pv.device_id=t2.device_id - LEFT JOIN - ( - SELECT partition_date,device_id - FROM - (--找出user_id当天活跃的第一个设备id - SELECT user_id,partition_date, - if(size(device_list) > 0, device_list [ 0 ], '') AS device_id - FROM online.ml_user_updates - WHERE partition_date='{today_str}' - )t1 - JOIN - ( --åŒ»ç”Ÿè´¦å· - SELECT distinct user_id - FROM online.tl_hdfs_doctor_view - WHERE partition_date = '{today_str}' - - --马甲账å·/模特用户 - UNION ALL - SELECT user_id - FROM ml.ml_c_ct_ui_user_dimen_d - WHERE partition_day = '{today_str}' - AND (is_puppet = 'true' or is_classifyuser = 'true') - - UNION ALL - --å…¬å¸å†…网覆盖用户 - select distinct user_id - from dim.dim_device_user_staff - - UNION ALL - --登陆过医生设备 - SELECT distinct t1.user_id - FROM - ( - SELECT user_id, v.device_id as device_id - FROM online.ml_user_history_detail - LATERAL VIEW EXPLODE(device_history_list) v AS device_id - WHERE partition_date = '{today_str}' - ) t1 - JOIN - ( - SELECT device_id - FROM online.ml_device_history_detail - WHERE partition_date = '{today_str}' - AND is_login_doctor = '1' - ) t2 - ON t1.device_id = t2.device_id - )t2 - on t1.user_id=t2.user_id - group by partition_date,device_id - )dev - on t2.device_id=dev.device_id -WHERE spam_pv.device_id IS NULL - and dev.device_id is null + """.format(today_str=today_str,yesterday_str=yesterday_str) print(new_urser_device_id_sql)