Commit 35f1f8a0 authored by 郭羽's avatar 郭羽

service embedding

parent a1d73464
......@@ -37,7 +37,7 @@ def getClickSql(start, end):
( --渠道,新老
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}'
where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -260,18 +260,18 @@ if __name__ == '__main__':
start = time.time()
# 入参
trainDays = int(sys.argv[1])
# spark = get_spark("embedding")
spark = get_spark("embedding")
print('trainDays:{}'.format(trainDays), flush=True)
endDay = addDays(-1)
startDay = addDays(-(1 + int(trainDays)))
endDay = addDays(0)
startDay = addDays(-int(trainDays))
print("train_data start:{} end:{}".format(startDay, endDay))
conf = SparkConf().setAppName('embedding').setMaster('local')
spark = SparkSession.builder.config(conf=conf).getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
# conf = SparkConf().setAppName('embedding').setMaster('local')
# spark = SparkSession.builder.config(conf=conf).getOrCreate()
# spark.sparkContext.setLogLevel("ERROR")
# 行为数据
clickSql = getClickSql(startDay, endDay)
......
......@@ -405,7 +405,7 @@ def getClickSql(start, end):
( --渠道,新老
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}'
where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -456,7 +456,7 @@ def getExposureSql(start, end):
( --渠道,新老
SELECT distinct device_id
FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}'
where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
......@@ -760,8 +760,8 @@ if __name__ == '__main__':
trainDays = int(sys.argv[1])
print('trainDays:{}'.format(trainDays),flush=True)
endDay = addDays(-1)
startDay = addDays(-(1 + int(trainDays)))
endDay = addDays(0)
startDay = addDays(-int(trainDays))
print("train_data start:{} end:{}".format(startDay,endDay))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment