Commit 26a5919d authored by 郭羽's avatar 郭羽

特征工程优化

parent 47f86736
...@@ -129,7 +129,7 @@ def addItemFeatures(samples,itemDF,dataVocab,multiVocab): ...@@ -129,7 +129,7 @@ def addItemFeatures(samples,itemDF,dataVocab,multiVocab):
# 转string # 转string
for c in bucketColumns: for c in bucketColumns:
samples = samples.withColumn(c + "Bucket",F.col(c + "Bucket").cast("string")) samples = samples.withColumn(c + "Bucket",F.col(c + "Bucket").cast("string")).drop(c)
dataVocab[c + "Bucket"] = [str(float(i)) for i in range(11)] dataVocab[c + "Bucket"] = [str(float(i)) for i in range(11)]
...@@ -854,6 +854,6 @@ if __name__ == '__main__': ...@@ -854,6 +854,6 @@ if __name__ == '__main__':
dataVocabStr = json.dumps(dataVocab, ensure_ascii=False) dataVocabStr = json.dumps(dataVocab, ensure_ascii=False)
dataVocabToRedis(dataVocabStr) dataVocabToRedis(dataVocabStr)
timestmp9 = int(round(time.time())) timestmp9 = int(round(time.time()))
print("总耗时s:{}".format(timestmp9 - timestmp7)) print("总耗时m:{}".format((timestmp9 - start)/60))
spark.stop() spark.stop()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment