Commit b1760038 authored by 张彦钊's avatar 张彦钊

把训练集中剔除预测集

parent 7c69c949
......@@ -245,7 +245,7 @@ def feature_engineer():
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train = rdd.map(
train = rdd.filter(lambda x: x[0] != validate_date).map(
lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
x[10], x[11], x[12], x[13], x[14], x[15],x[16],x[17],x[18]))
f = time.time()
......@@ -274,7 +274,6 @@ def feature_engineer():
.repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite")
print("va tfrecord done")
print("删除视频特征")
rdd.unpersist()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment