Commit b1760038 authored by 张彦钊's avatar 张彦钊

把训练集中剔除预测集

parent 7c69c949
...@@ -245,7 +245,7 @@ def feature_engineer(): ...@@ -245,7 +245,7 @@ def feature_engineer():
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集 # TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train = rdd.map( train = rdd.filter(lambda x: x[0] != validate_date).map(
lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9], lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
x[10], x[11], x[12], x[13], x[14], x[15],x[16],x[17],x[18])) x[10], x[11], x[12], x[13], x[14], x[15],x[16],x[17],x[18]))
f = time.time() f = time.time()
...@@ -274,7 +274,6 @@ def feature_engineer(): ...@@ -274,7 +274,6 @@ def feature_engineer():
.repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite") .repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite")
print("va tfrecord done") print("va tfrecord done")
print("删除视频特征")
rdd.unpersist() rdd.unpersist()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment