Commit 794211e6 authored by 宋柯's avatar 宋柯

模型bug修复

parent 6f0f4ded
......@@ -986,15 +986,15 @@ if __name__ == '__main__':
itemStatisticDF.orderBy(['card_id', 'label', 'partition_date'])
itemStatisticDF.createOrReplaceTempView("itemStatisticDF")
staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
F.avg(F.col('rating')).alias('itemRatingAvg'),
F.stddev(F.col('rating')).alias('itemRatingStddev'),
F.sum(when(F.col('label') == 1, F.lit(1)).otherwise(F.lit(0))).alias("itemClickCount"),
F.sum(when(F.col('label') == 0, F.lit(1)).otherwise(F.lit(0))).alias("itemExpCount")
).fillna(0) \
.withColumn('itemRatingStddev', F.format_number(F.col('itemRatingStddev'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemRatingAvg', F.format_number(F.col('itemRatingAvg'), NUMBER_PRECISION).cast("float")) \
.withColumn('itemCtr',F.format_number(ctrUdf(F.col("itemClickCount"),(F.col("itemExpCount"))), NUMBER_PRECISION).cast("float"))
# staticFeatures = samples.groupBy('item_id').agg(F.count(F.lit(1)).alias('itemRatingCount'),
# F.avg(F.col('rating')).alias('itemRatingAvg'),
# F.stddev(F.col('rating')).alias('itemRatingStddev'),
# F.sum(when(F.col('label') == 1, F.lit(1)).otherwise(F.lit(0))).alias("itemClickCount"),
# F.sum(when(F.col('label') == 0, F.lit(1)).otherwise(F.lit(0))).alias("itemExpCount")
# ).fillna(0) \
# .withColumn('itemRatingStddev', F.format_number(F.col('itemRatingStddev'), NUMBER_PRECISION).cast("float")) \
# .withColumn('itemRatingAvg', F.format_number(F.col('itemRatingAvg'), NUMBER_PRECISION).cast("float")) \
# .withColumn('itemCtr',F.format_number(ctrUdf(F.col("itemClickCount"),(F.col("itemExpCount"))), NUMBER_PRECISION).cast("float"))
itemStatisticSql = """
SELECT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment