Commit 6f0f4ded authored by 宋柯's avatar 宋柯

模型bug修复

parent c020f632
......@@ -985,12 +985,25 @@ if __name__ == '__main__':
itemStatisticDF = itemStatisticDF.rdd.flatMap(splitPatitionDatasFlatMapFunc).toDF(["card_id", "partition_date", "label", "label_count"])
itemStatisticDF.orderBy(['card_id', 'label', 'partition_date'])
itemStatisticDF.createOrReplaceTempView("itemStatisticDF")
staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'),
F.stddev(F.col('rating')).alias('itemRatingStddev'),
F.sum(when(F.col('label') == 1, F.lit(1)).otherwise(F.lit(0))).alias("itemClickCount"),
F.sum(when(F.col('label') == 0, F.lit(1)).otherwise(F.lit(0))).alias("itemExpCount")
).fillna(0) \
.withColumn('itemRatingStddev', F.format_number(F.col('itemRatingStddev'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemRatingAvg', F.format_number(F.col('itemRatingAvg'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemCtr',F.format_number(ctrUdf(F.col("itemClickCount"),(F.col("itemExpCount"))), NUMBER_PRECISION).cast("float"))
itemStatisticSql = """
SELECT
card_id,
partition_date,
label,
SUM(label_count) OVER(PARTITION BY card_id ORDER BY partition_date ROWS BETWEEN ${itemStatisticStartDays} PRECEDING AND 1 PRECEDING) label_count_sum
partition_date,
COALESCE(SUM(label_count) OVER(PARTITION BY card_id, label ORDER BY partition_date ROWS BETWEEN ${itemStatisticStartDays} PRECEDING AND 1 PRECEDING), 0) label_count_sum,
COALESCE(AVG(label_count) OVER(PARTITION BY card_id, label ORDER BY partition_date ROWS BETWEEN ${itemStatisticStartDays} PRECEDING AND 1 PRECEDING), 0) label_count_avg,
COALESCE(STDDEV(label_count) OVER(PARTITION BY card_id, label ORDER BY partition_date ROWS BETWEEN ${itemStatisticStartDays} PRECEDING AND 1 PRECEDING), 0) label_count_stddev
FROM
itemStatisticDF
""".format(itemStatisticStartDays = itemStatisticStartDays)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment