Commit e350d105 authored by 宋柯's avatar 宋柯

模型上线

parent 33149f28
...@@ -588,7 +588,18 @@ def getClickSql(start, end): ...@@ -588,7 +588,18 @@ def getClickSql(start, end):
group by partition_date, city_id, cl_id, business_id, cl_type group by partition_date, city_id, cl_id, business_id, cl_type
) AS t1 ) AS t1
join join
( --渠道,新老 device_filter_df t2
on t1.cl_id = t2.device_id
LEFT JOIN
black_device_df t3
on t3.device_id=t2.device_id
WHERE t3.device_id is null
""".format(startDay=start,endDay=end)
print(sql)
return sql
def get_device_filter(spark, start, end):
sql = """
SELECT distinct device_id SELECT distinct device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<='{endDay}' where partition_date>='{startDay}' and partition_date<='{endDay}'
...@@ -601,20 +612,25 @@ def getClickSql(start, end): ...@@ -601,20 +612,25 @@ def getClickSql(start, end):
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ' ,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown') ,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%' AND first_channel_source_type not like 'promotion\_jf\_%'
) t2 """.format(startDay=start,endDay=end)
on t1.cl_id = t2.device_id df = spark.sql(sql)
LEFT JOIN df.createOrReplaceTempView('device_filter_df')
( --去除黑名单 df.cache()
df.show(20, False)
return df
def get_black_device(spark):
sql = """
select distinct device_id select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','') where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true' AND is_abnormal_device = 'true'
)t3 """
on t3.device_id=t2.device_id df = spark.sql(sql)
WHERE t3.device_id is null df.createOrReplaceTempView('black_device_df')
""".format(startDay=start,endDay=end) df.cache()
print(sql) df.show(20, False)
return sql return df
def getExposureSql(start, end): def getExposureSql(start, end):
# t1.partition_date, t1.cl_id device_id, t1.card_id, t1.time_stamp, t1.cl_type as os, t1.city_id as user_city_id # t1.partition_date, t1.cl_id device_id, t1.card_id, t1.time_stamp, t1.cl_type as os, t1.city_id as user_city_id
...@@ -639,28 +655,10 @@ def getExposureSql(start, end): ...@@ -639,28 +655,10 @@ def getExposureSql(start, end):
group by partition_date, city_id, cl_type, cl_id, card_id, app_session_id group by partition_date, city_id, cl_type, cl_id, card_id, app_session_id
) t1 ) t1
join join
( --渠道,新老 device_filter_df t2
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
,'js-az1','js-az2','js-az3','js-az4','js-az5','jfq-az1','jfq-az2','jfq-az3','jfq-az4','jfq-az5','toufang1'
,'toufang2','toufang3','toufang4','toufang5','toufang6','TF-toufang1','TF-toufang2','TF-toufang3','TF-toufang4'
,'TF-toufang5','tf-toufang1','tf-toufang2','tf-toufang3','tf-toufang4','tf-toufang5','benzhan','promotion_aso100'
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei','','unknown')
AND first_channel_source_type not like 'promotion\_jf\_%'
) t2
on t1.cl_id = t2.device_id on t1.cl_id = t2.device_id
LEFT JOIN LEFT JOIN
( --去除黑名单 black_device_df t3
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)t3
on t3.device_id=t2.device_id on t3.device_id=t2.device_id
WHERE t3.device_id is null WHERE t3.device_id is null
""".format(startDay=start,endDay=end) """.format(startDay=start,endDay=end)
...@@ -1108,19 +1106,16 @@ def get_and_save_device_feature(spark, fields_na_value_dict, days = 180): ...@@ -1108,19 +1106,16 @@ def get_and_save_device_feature(spark, fields_na_value_dict, days = 180):
) t2 ) t2
on t1.cl_id = t2.device_id on t1.cl_id = t2.device_id
LEFT JOIN LEFT JOIN
( --去除黑名单 black_device_df t3
select distinct device_id
from ML.ML_D_CT_DV_DEVICECLEAN_DIMEN_D
where PARTITION_DAY =regexp_replace(DATE_SUB(current_date,1) ,'-','')
AND is_abnormal_device = 'true'
)t3
on t3.device_id=t2.device_id on t3.device_id=t2.device_id
WHERE t3.device_id is null WHERE t3.device_id is null
""".format(startDay=start,endDay=end) """.format(startDay=start,endDay=end)
print(sql) print(sql)
device_feature_df = spark.sql(sql).toPandas() device_feature_df = spark.sql(sql)
device_feature_df = device_feature_df.na.fill(fields_na_value_dict) device_feature_df = device_feature_df.na.fill(fields_na_value_dict)
device_feature_df.printSchema()
device_feature_df = device_feature_df.toPandas()
conn = getRedisConn() conn = getRedisConn()
BATCH = 5000 BATCH = 5000
Key_TMP = 'strategy:model:rank:widedeep:device:feature:tmp' Key_TMP = 'strategy:model:rank:widedeep:device:feature:tmp'
...@@ -1136,6 +1131,8 @@ def get_click_exp_rating_df(trainDays, spark): ...@@ -1136,6 +1131,8 @@ def get_click_exp_rating_df(trainDays, spark):
startDay, endDay = get_click_exp_start_end_time(trainDays) startDay, endDay = get_click_exp_start_end_time(trainDays)
#获取曝光和点击行为数据 #获取曝光和点击行为数据
get_device_filter(spark, startDay, endDay)
get_black_device(spark)
clickSql = getClickSql(startDay,endDay) clickSql = getClickSql(startDay,endDay)
expSql = getExposureSql(startDay,endDay) expSql = getExposureSql(startDay,endDay)
clickDF = spark.sql(clickSql) clickDF = spark.sql(clickSql)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment