Commit eae1757a authored by 宋柯's avatar 宋柯

模型调试

parent 5098389a
...@@ -554,102 +554,103 @@ def getExposureSql(start, end): ...@@ -554,102 +554,103 @@ def getExposureSql(start, end):
def getItemStatisticSql(start, end): def getItemStatisticSql(start, end):
sql = """ sql = """
SELECT TT.card_id, TT.partition_date, TT.label, count(1) as label_count SELECT TT.card_id, TT.partition_date, TT.label, count(1) as label_count
( FROM
SELECT T.partition_date, T.card_id, T.label
FROM
(
SELECT DISTINCT t1.partition_date, t1.cl_id device_id, t1.card_id,t1.time_stamp,t1.cl_type as os,t1.city_id as user_city_id, 1 as label
FROM
( (
select partition_date,city_id,cl_id,business_id as card_id,time_stamp,page_stay,cl_type SELECT T.partition_date, T.card_id, T.label
from online.bl_hdfs_maidian_updates FROM
where action = 'page_view' (
AND partition_date>='{startDay}' and partition_date<='{endDay}' SELECT DISTINCT t1.partition_date, t1.cl_id device_id, t1.card_id,t1.time_stamp,t1.cl_type as os,t1.city_id as user_city_id, 1 as label
AND page_name='welfare_detail' FROM
-- AND page_stay>=1 (
AND cl_id is not null select partition_date,city_id,cl_id,business_id as card_id,time_stamp,page_stay,cl_type
AND cl_id != '' from online.bl_hdfs_maidian_updates
AND business_id is not null where action = 'page_view'
AND business_id != '' AND partition_date>='{startDay}' and partition_date<='{endDay}'
group by partition_date,city_id,cl_id,business_id,time_stamp,page_stay,cl_type AND page_name='welfare_detail'
) AS t1 -- AND page_stay>=1
join AND cl_id is not null
( --渠道,新老 AND cl_id != ''
SELECT distinct device_id AND business_id is not null
FROM online.ml_device_day_active_status AND business_id != ''
where partition_date>='{startDay}' and partition_date<='{endDay}' group by partition_date,city_id,cl_id,business_id,time_stamp,page_stay,cl_type
AND active_type in ('1','2','4') ) AS t1
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' join
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ( --渠道,新老
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1' SELECT distinct device_id
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4' FROM online.ml_device_day_active_status
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100' where partition_date>='{startDay}' and partition_date<='{endDay}'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' AND active_type in ('1','2','4')
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown') and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
AND first_channel_source_type not like 'promotion\_jf\_%' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
) t2 ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
on t1.cl_id = t2.device_id ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
LEFT JOIN ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
( --去除黑名单 ,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
select distinct device_id AND first_channel_source_type not like 'promotion\_jf\_%'
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D ) t2
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','') on t1.cl_id = t2.device_id
AND is_abnormal_device = 'true'
)t3 LEFT JOIN
on t3.device_id=t2.device_id ( --去除黑名单
WHERE t3.device_id is null select distinct device_id
UNION from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
SELECT DISTINCT t1.partition_date,t1.cl_id device_id,t1.card_id,t1.time_stamp,cl_type as os,t1.city_id as user_city_id, 0 as label where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
from AND is_abnormal_device = 'true'
( --新首页卡片曝光 )t3
SELECT partition_date,city_id,cl_type,cl_id,card_id,max(time_stamp) as time_stamp on t3.device_id=t2.device_id
FROM online.ml_community_precise_exposure_detail WHERE t3.device_id is null
where partition_date>='{startDay}' and partition_date<='{endDay}' UNION
and action in ('page_precise_exposure','home_choiceness_card_exposure') SELECT DISTINCT t1.partition_date,t1.cl_id device_id,t1.card_id,t1.time_stamp,cl_type as os,t1.city_id as user_city_id, 0 as label
and cl_id IS NOT NULL from
and card_id IS NOT NULL ( --新首页卡片曝光
and is_exposure='1' SELECT partition_date,city_id,cl_type,cl_id,card_id,max(time_stamp) as time_stamp
--and page_name='home' FROM online.ml_community_precise_exposure_detail
--and tab_name='精选' where partition_date>='{startDay}' and partition_date<='{endDay}'
--and page_name in ('home','search_result_more') and action in ('page_precise_exposure','home_choiceness_card_exposure')
--and ((page_name='home' and tab_name='精选') or (page_name='category' and tab_name = '商品')) and cl_id IS NOT NULL
and card_type in ('card','video') and card_id IS NOT NULL
and card_content_type in ('service') and is_exposure='1'
and (get_json_object(exposure_card,'$.in_page_pos') is null or get_json_object(exposure_card,'$.in_page_pos') != 'seckill') --and page_name='home'
group by partition_date,city_id,cl_type,cl_id,card_id,app_session_id --and tab_name='精选'
--and page_name in ('home','search_result_more')
) t1 --and ((page_name='home' and tab_name='精选') or (page_name='category' and tab_name = '商品'))
join and card_type in ('card','video')
( --渠道,新老 and card_content_type in ('service')
SELECT distinct device_id and (get_json_object(exposure_card,'$.in_page_pos') is null or get_json_object(exposure_card,'$.in_page_pos') != 'seckill')
FROM online.ml_device_day_active_status group by partition_date,city_id,cl_type,cl_id,card_id,app_session_id
where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4') ) t1
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' join
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ( --渠道,新老
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1' SELECT distinct device_id
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4' FROM online.ml_device_day_active_status
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100' where partition_date>='{startDay}' and partition_date<='{endDay}'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' AND active_type in ('1','2','4')
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown') and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
AND first_channel_source_type not like 'promotion\_jf\_%' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
) t2 ,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
on t1.cl_id = t2.device_id ,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
LEFT JOIN ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
( --去除黑名单 ,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
select distinct device_id AND first_channel_source_type not like 'promotion\_jf\_%'
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D ) t2
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','') on t1.cl_id = t2.device_id
AND is_abnormal_device = 'true'
)t3 LEFT JOIN
on t3.device_id=t2.device_id ( --去除黑名单
WHERE t3.device_id is null select distinct device_id
) T from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
) TT where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
GROUP BY TT.card_id, TT.partition_date, TT.label AND is_abnormal_device = 'true'
)t3
on t3.device_id=t2.device_id
WHERE t3.device_id is null
) T
) TT
GROUP BY TT.card_id, TT.partition_date, TT.label
""".format(startDay=start,endDay=end) """.format(startDay=start,endDay=end)
print(sql) print(sql)
return sql return sql
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment