Commit 915a608b authored by Your Name's avatar Your Name

predict add sample id

parent cc467833
...@@ -8,6 +8,7 @@ import pandas as pd ...@@ -8,6 +8,7 @@ import pandas as pd
from datetime import date, timedelta from datetime import date, timedelta
import time import time
from pyspark import StorageLevel from pyspark import StorageLevel
from pyspark.sql import Row
import os import os
def model_fn(features, labels, mode, params): def model_fn(features, labels, mode, params):
...@@ -213,7 +214,11 @@ if __name__ == "__main__": ...@@ -213,7 +214,11 @@ if __name__ == "__main__":
print(rdd_te_files.collect()) print(rdd_te_files.collect())
print("-" * 100) print("-" * 100)
indices = rdd_te_files.repartition(100).map(lambda x: main(x)) indices = rdd_te_files.repartition(100).map(lambda x: main(x))
print(indices.take(2)) print(indices.take(1))
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(lambda l: Row(sample_id=l.split(":")[0],ctcvr=l.split(":")[1])))
te_result_pandas_df = te_result_dataframe.toDF("sample_id","ctcvr")
te_result_pandas_df.toPandas().to_csv("/home/gmuser/esmm/nearby/pred.txt", header=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment