Commit 7d47408e authored by 宋柯's avatar 宋柯

模型bug修复

parent be1db75b
...@@ -927,8 +927,8 @@ def get_service_feature_df(): ...@@ -927,8 +927,8 @@ def get_service_feature_df():
def addDays(n, format="%Y%m%d"): def addDays(n, format="%Y%m%d"):
return (date.today() + timedelta(days=n)).strftime(format) return (date.today() + timedelta(days=n)).strftime(format)
def generatePartitionDates(trainDays): def generatePartitionDates(partitionDates):
return [addDays(-trainDay) for trainDay in range(trainDays)] return [addDays(-trainDay) for trainDay in range(partitionDates)]
#显示所有列 #显示所有列
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
...@@ -942,11 +942,12 @@ if __name__ == '__main__': ...@@ -942,11 +942,12 @@ if __name__ == '__main__':
start = time.time() start = time.time()
#入参 #入参
trainDays = int(sys.argv[1]) trainDays = int(sys.argv[1])
itemStatisticStartDays = int(sys.argv[2])
print('trainDays:{}'.format(trainDays),flush=True) print('trainDays:{}'.format(trainDays),flush=True)
endDay = addDays(0) endDay = addDays(0)
startDay = addDays(-int(trainDays)) startDay = addDays(-int(trainDays))
itemStatisticStartDay = addDays(-int(trainDays + 31)) itemStatisticStartDay = addDays(-int(trainDays + itemStatisticStartDays))
print("train_data start:{} end:{}".format(startDay,endDay)) print("train_data start:{} end:{}".format(startDay,endDay))
...@@ -968,7 +969,7 @@ if __name__ == '__main__': ...@@ -968,7 +969,7 @@ if __name__ == '__main__':
itemStatisticDF = spark.sql(itemStatisticSql) itemStatisticDF = spark.sql(itemStatisticSql)
itemStatisticDF.show(100, False) itemStatisticDF.show(100, False)
partitionDatas = generatePartitionDates(trainDays) partitionDatas = generatePartitionDates(trainDays + itemStatisticStartDays)
partitionDatasBC = spark.sparkContext.broadcast(partitionDatas) partitionDatasBC = spark.sparkContext.broadcast(partitionDatas)
def splitPatitionDatasFlatMapFunc(row): def splitPatitionDatasFlatMapFunc(row):
...@@ -1064,7 +1065,7 @@ if __name__ == '__main__': ...@@ -1064,7 +1065,7 @@ if __name__ == '__main__':
# item统计特征处理 # item统计特征处理
itemStaticDF = addItemStaticFeatures(ratingDF,itemDF_spark,dataVocab) itemStaticDF = addItemStaticFeatures(ratingDF,itemDF_spark,dataVocab)
sys.exit(1) # sys.exit(1)
# 统计数据处理 # 统计数据处理
# ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab) # ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment