Commit 915a608b authored by Your Name's avatar Your Name

predict add sample id

parent cc467833
......@@ -8,6 +8,7 @@ import pandas as pd
from datetime import date, timedelta
import time
from pyspark import StorageLevel
from pyspark.sql import Row
import os
def model_fn(features, labels, mode, params):
......@@ -213,7 +214,11 @@ if __name__ == "__main__":
print(rdd_te_files.collect())
print("-" * 100)
indices = rdd_te_files.repartition(100).map(lambda x: main(x))
print(indices.take(2))
print(indices.take(1))
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(lambda l: Row(sample_id=l.split(":")[0],ctcvr=l.split(":")[1])))
te_result_pandas_df = te_result_dataframe.toDF("sample_id","ctcvr")
te_result_pandas_df.toPandas().to_csv("/home/gmuser/esmm/nearby/pred.txt", header=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment