Commit ea23b1d6 authored by 张彦钊's avatar 张彦钊

change test fliw

parent f8359d52
...@@ -215,7 +215,7 @@ def feature_engineer(): ...@@ -215,7 +215,7 @@ def feature_engineer():
spark.createDataFrame(train).toDF("y", "z", "app_list", "level2_list", "level3_list", spark.createDataFrame(train).toDF("y", "z", "app_list", "level2_list", "level3_list",
"tag1_list", "tag2_list", "tag3_list", "tag4_list", "tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids") \ "tag5_list", "tag6_list", "tag7_list", "ids") \
.write.format("tfrecords").save(path=path + "tr/", mode="overwrite") .repartition(1).write.format("tfrecords").save(path=path + "tr/", mode="overwrite")
h = time.time() h = time.time()
print("train tfrecord done") print("train tfrecord done")
print((h - f) / 60) print((h - f) / 60)
...@@ -230,7 +230,7 @@ def feature_engineer(): ...@@ -230,7 +230,7 @@ def feature_engineer():
spark.createDataFrame(test).toDF("y", "z", "app_list", "level2_list", "level3_list", spark.createDataFrame(test).toDF("y", "z", "app_list", "level2_list", "level3_list",
"tag1_list", "tag2_list", "tag3_list", "tag4_list", "tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids") \ "tag5_list", "tag6_list", "tag7_list", "ids") \
.write.format("tfrecords").save(path=path + "va/", mode="overwrite") .repartition(1).write.format("tfrecords").save(path=path + "va/", mode="overwrite")
print("va tfrecord done") print("va tfrecord done")
...@@ -310,7 +310,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map): ...@@ -310,7 +310,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
print("nearby csv") print("nearby csv")
native_pre.toPandas().to_csv(local_path + "nearby.csv", header=True) native_pre.toPandas().to_csv(local_path + "nearby.csv", header=True)
spark.createDataFrame(rdd.filter(lambda x: x[0] == 1) spark.createDataFrame(rdd.filter(lambda x: x[0] == 1)
.map( .map(
lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16]))) \ lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16]))) \
......
...@@ -159,7 +159,8 @@ def get_hdfs(dir_in): ...@@ -159,7 +159,8 @@ def get_hdfs(dir_in):
if __name__ == '__main__': if __name__ == '__main__':
preds = [3,4] preds = [3,4]
with open("/home/gmuser/hello.txt", "w") as fo: with open("/home/gmuser/hel"
"lo.txt", "w") as fo:
for prob in preds: for prob in preds:
fo.write(str(prob)) fo.write(str(prob))
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment