Commit 5b8a4fe3 authored by 郭羽's avatar 郭羽

特征工程优化

parent 4777e78a
......@@ -764,8 +764,6 @@ if __name__ == '__main__':
expDF = spark.sql(expSql)
# ratingDF = samplesNegAndUnion(clickDF,expDF)
ratingDF = clickDF.union(expDF)
print("pos size:"+str(clickDF.count()),"neg size:"+str(expDF.count()))
ratingDF = ratingDF.withColumnRenamed("time_stamp", "timestamp")\
.withColumnRenamed("device_id", "userid")\
.withColumnRenamed("card_id", "itemid")\
......@@ -782,6 +780,10 @@ if __name__ == '__main__':
print("添加label...")
ratingSamplesWithLabel = addSampleLabel(ratingDF)
posCount = ratingSamplesWithLabel.filter(F.col("label")==1).count()
negCount = ratingSamplesWithLabel.filter(F.col("label")==0).count()
print("pos size:"+str(posCount),"neg size:"+str(negCount))
# 数据字典
dataVocab = {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment