Commit 7d47408e authored by 宋柯's avatar 宋柯

模型bug修复

parent be1db75b
......@@ -927,8 +927,8 @@ def get_service_feature_df():
def addDays(n, format="%Y%m%d"):
return (date.today() + timedelta(days=n)).strftime(format)
def generatePartitionDates(trainDays):
return [addDays(-trainDay) for trainDay in range(trainDays)]
def generatePartitionDates(partitionDates):
return [addDays(-trainDay) for trainDay in range(partitionDates)]
#显示所有列
pd.set_option('display.max_columns', None)
......@@ -942,11 +942,12 @@ if __name__ == '__main__':
start = time.time()
#入参
trainDays = int(sys.argv[1])
itemStatisticStartDays = int(sys.argv[2])
print('trainDays:{}'.format(trainDays),flush=True)
endDay = addDays(0)
startDay = addDays(-int(trainDays))
itemStatisticStartDay = addDays(-int(trainDays + 31))
itemStatisticStartDay = addDays(-int(trainDays + itemStatisticStartDays))
print("train_data start:{} end:{}".format(startDay,endDay))
......@@ -968,7 +969,7 @@ if __name__ == '__main__':
itemStatisticDF = spark.sql(itemStatisticSql)
itemStatisticDF.show(100, False)
partitionDatas = generatePartitionDates(trainDays)
partitionDatas = generatePartitionDates(trainDays + itemStatisticStartDays)
partitionDatasBC = spark.sparkContext.broadcast(partitionDatas)
def splitPatitionDatasFlatMapFunc(row):
......@@ -1064,7 +1065,7 @@ if __name__ == '__main__':
# item统计特征处理
itemStaticDF = addItemStaticFeatures(ratingDF,itemDF_spark,dataVocab)
sys.exit(1)
# sys.exit(1)
# 统计数据处理
# ratingSamplesWithLabel = addStaticsFeatures(ratingSamplesWithLabel,dataVocab)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment