Commit e8aa2e8d authored by Your Name's avatar Your Name

dist test

parent 3cbe1c68
......@@ -230,11 +230,11 @@ if __name__ == "__main__":
rdd_te_files = spark.sparkContext.parallelize(te_files)
print("-" * 100)
indices = rdd_te_files.repartition(1).map(lambda x: main(x))
print(indices.take(1))
# print(indices.take(1))
print("-" * 100)
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4]))).toDF("uid","city","cid","p_value")
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
te_result_dataframe.show()
......@@ -273,17 +273,19 @@ if __name__ == "__main__":
rdd_te_files = spark.sparkContext.parallelize(te_files)
print("-"*100)
indices = rdd_te_files.repartition(1).map(lambda x: main(x))
print(indices.take(1))
# print(indices.take(1))
print("-" * 100)
te_result_dataframe = spark.createDataFrame(indices.flatMap(lambda x: x.split(";")).map(
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4]))).toDF("uid","city","cid","p_value")
lambda l: Row(sample_id=l.split(":")[0],uid=l.split(":")[1],city=l.split(":")[2],cid_id=l.split(":")[3],ctcvr=l.split(":")[4])))
nearby_data = te_result_dataframe.toPandas()
nearby_data.show()
native_data = spark.read.parquet(path+"native_result/",mode="overwrite")
native_data.show()
native_data_pd = native_data.toPandas()
native_data_pd.show()
print("耗时(秒):")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment