diff --git a/task/search_strategy_d.py b/task/search_strategy_d.py index c1b58ff9b0d8aecee8a6da74a17e11e2a761a92b..ccdaa29ffbd5091ffbce6dc6fc602d83110d775e 100644 --- a/task/search_strategy_d.py +++ b/task/search_strategy_d.py @@ -72,13 +72,6 @@ for t in range(0, task_days): yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d") one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d") sql_distinct_device_id = r"""SELECT partition_date,device_id FROM - ( - SELECT user_id,partition_date, - if(size(device_list) > 0, device_list [0], '') AS device_id - FROM online.ml_user_updates - WHERE partition_date>='{yesterday_str}' AND partition_date<'{today_str}' - )t1 - JOIN ( SELECT distinct user_id FROM online.tl_hdfs_doctor_view @@ -93,9 +86,25 @@ for t in range(0, task_days): UNION ALL select distinct user_id from dim.dim_device_user_staff - - )t3 - on t1.user_id=t3.user_id group by partition_date,device_id + UNION ALL + + SELECT distinct t1.user_id + FROM + ( + SELECT user_id, v.device_id as device_id + FROM online.ml_user_history_detail + LATERAL VIEW EXPLODE(device_history_list) v AS device_id + WHERE partition_date = '{yesterday_str}' + )t1 + JOIN + ( + SELECT device_id + FROM online.ml_device_history_detail + WHERE partition_date = '{yesterday_str}' + AND is_login_doctor = '1' + )t2 + ON t1.device_id = t2.device_id + ) """.format(today_str=today_str, yesterday_str=yesterday_str) print(sql_distinct_device_id) distinct_device_id_df = spark.sql(sql_distinct_device_id)