Commit c020f632 authored by 宋柯's avatar 宋柯

模型bug修复

parent 0c4bd1db
......@@ -967,7 +967,7 @@ if __name__ == '__main__':
itemStatisticSql = getItemStatisticSql(itemStatisticStartDay, endDay)
itemStatisticDF = spark.sql(itemStatisticSql)
itemStatisticDF.show(100, False)
# itemStatisticDF.show(100, False)
partitionDatas = generatePartitionDates(trainDays + itemStatisticStartDays)
partitionDatasBC = spark.sparkContext.broadcast(partitionDatas)
......@@ -983,7 +983,18 @@ if __name__ == '__main__':
return res
itemStatisticDF = itemStatisticDF.rdd.flatMap(splitPatitionDatasFlatMapFunc).toDF(["card_id", "partition_date", "label", "label_count"])
itemStatisticDF.orderBy(['card_id', 'label', 'partition_date']).show(1000, False)
itemStatisticDF.orderBy(['card_id', 'label', 'partition_date'])
itemStatisticDF.createOrReplaceTempView("itemStatisticDF")
itemStatisticSql = """
SELECT
card_id,
partition_date,
label,
SUM(label_count) OVER(PARTITION BY card_id ORDER BY partition_date ROWS BETWEEN ${itemStatisticStartDays} PRECEDING AND 1 PRECEDING) label_count_sum
FROM
itemStatisticDF
""".format(itemStatisticStartDays = itemStatisticStartDays)
spark.sql(itemStatisticSql).show(100, False)
sys.exit(1)
clickDF = spark.sql(clickSql)
clickDF.createOrReplaceTempView("clickDF")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment