Commit 35f1f8a0 authored by 郭羽's avatar 郭羽

service embedding

parent a1d73464
...@@ -37,7 +37,7 @@ def getClickSql(start, end): ...@@ -37,7 +37,7 @@ def getClickSql(start, end):
( --渠道,新老 ( --渠道,新老
SELECT distinct device_id SELECT distinct device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}' where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4') AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
...@@ -260,18 +260,18 @@ if __name__ == '__main__': ...@@ -260,18 +260,18 @@ if __name__ == '__main__':
start = time.time() start = time.time()
# 入参 # 入参
trainDays = int(sys.argv[1]) trainDays = int(sys.argv[1])
# spark = get_spark("embedding") spark = get_spark("embedding")
print('trainDays:{}'.format(trainDays), flush=True) print('trainDays:{}'.format(trainDays), flush=True)
endDay = addDays(-1) endDay = addDays(0)
startDay = addDays(-(1 + int(trainDays))) startDay = addDays(-int(trainDays))
print("train_data start:{} end:{}".format(startDay, endDay)) print("train_data start:{} end:{}".format(startDay, endDay))
conf = SparkConf().setAppName('embedding').setMaster('local') # conf = SparkConf().setAppName('embedding').setMaster('local')
spark = SparkSession.builder.config(conf=conf).getOrCreate() # spark = SparkSession.builder.config(conf=conf).getOrCreate()
spark.sparkContext.setLogLevel("ERROR") # spark.sparkContext.setLogLevel("ERROR")
# 行为数据 # 行为数据
clickSql = getClickSql(startDay, endDay) clickSql = getClickSql(startDay, endDay)
......
...@@ -405,7 +405,7 @@ def getClickSql(start, end): ...@@ -405,7 +405,7 @@ def getClickSql(start, end):
( --渠道,新老 ( --渠道,新老
SELECT distinct device_id SELECT distinct device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}' where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4') AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
...@@ -456,7 +456,7 @@ def getExposureSql(start, end): ...@@ -456,7 +456,7 @@ def getExposureSql(start, end):
( --渠道,新老 ( --渠道,新老
SELECT distinct device_id SELECT distinct device_id
FROM online.ml_device_day_active_status FROM online.ml_device_day_active_status
where partition_date>='{startDay}' and partition_date<'{endDay}' where partition_date>='{startDay}' and partition_date<='{endDay}'
AND active_type in ('1','2','4') AND active_type in ('1','2','4')
and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3' and first_channel_source_type not in ('yqxiu1','yqxiu2','yqxiu3','yqxiu4','yqxiu5','mxyc1','mxyc2','mxyc3'
,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang' ,'wanpu','jinshan','jx','maimai','zhuoyi','huatian','suopingjingling','mocha','mizhe','meika','lamabang'
...@@ -760,8 +760,8 @@ if __name__ == '__main__': ...@@ -760,8 +760,8 @@ if __name__ == '__main__':
trainDays = int(sys.argv[1]) trainDays = int(sys.argv[1])
print('trainDays:{}'.format(trainDays),flush=True) print('trainDays:{}'.format(trainDays),flush=True)
endDay = addDays(-1) endDay = addDays(0)
startDay = addDays(-(1 + int(trainDays))) startDay = addDays(-int(trainDays))
print("train_data start:{} end:{}".format(startDay,endDay)) print("train_data start:{} end:{}".format(startDay,endDay))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment