Commit ea23b1d6 authored by 张彦钊's avatar 张彦钊

change test fliw

parent f8359d52
......@@ -215,7 +215,7 @@ def feature_engineer():
spark.createDataFrame(train).toDF("y", "z", "app_list", "level2_list", "level3_list",
"tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids") \
.write.format("tfrecords").save(path=path + "tr/", mode="overwrite")
.repartition(1).write.format("tfrecords").save(path=path + "tr/", mode="overwrite")
h = time.time()
print("train tfrecord done")
print((h - f) / 60)
......@@ -230,7 +230,7 @@ def feature_engineer():
spark.createDataFrame(test).toDF("y", "z", "app_list", "level2_list", "level3_list",
"tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids") \
.write.format("tfrecords").save(path=path + "va/", mode="overwrite")
.repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite")
print("va tfrecord done")
......@@ -310,7 +310,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
print("nearby csv")
native_pre.toPandas().to_csv(local_path + "nearby.csv", header=True)
spark.createDataFrame(rdd.filter(lambda x: x[0] == 1)
.map(
lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16]))) \
......
......@@ -159,7 +159,8 @@ def get_hdfs(dir_in):
if __name__ == '__main__':
preds = [3,4]
with open("/home/gmuser/hello.txt", "w") as fo:
with open("/home/gmuser/hel"
"lo.txt", "w") as fo:
for prob in preds:
fo.write(str(prob))
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment