Commit 26a5919d authored by 郭羽's avatar 郭羽

特征工程优化

parent 47f86736
......@@ -129,7 +129,7 @@ def addItemFeatures(samples,itemDF,dataVocab,multiVocab):
# 转string
for c in bucketColumns:
samples = samples.withColumn(c + "Bucket",F.col(c + "Bucket").cast("string"))
samples = samples.withColumn(c + "Bucket",F.col(c + "Bucket").cast("string")).drop(c)
dataVocab[c + "Bucket"] = [str(float(i)) for i in range(11)]
......@@ -854,6 +854,6 @@ if __name__ == '__main__':
dataVocabStr = json.dumps(dataVocab, ensure_ascii=False)
dataVocabToRedis(dataVocabStr)
timestmp9 = int(round(time.time()))
print("总耗时s:{}".format(timestmp9 - timestmp7))
print("总耗时m:{}".format((timestmp9 - start)/60))
spark.stop()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment