Commit 5d8bfc6b authored by 张彦钊's avatar 张彦钊

add distinct

parent c96c5ebf
...@@ -198,14 +198,16 @@ def con_sql(db,sql): ...@@ -198,14 +198,16 @@ def con_sql(db,sql):
def test(): def test():
sql = "select stat_date,cid_id from esmm_train_data e where stat_date >= '{}'".format("2019-03-25") # sql = "select stat_date,cid_id from esmm_train_data e where stat_date >= '{}'".format("2019-03-25")
df = spark.createDataFrame(spark.sql(sql).rdd.map(lambda x:(x[0],x[1])).zipWithIndex() # df = spark.createDataFrame(spark.sql(sql).rdd.map(lambda x:(x[0],x[1])).zipWithIndex()
.map(lambda x:(x[1],x[0][0],x[0][1]))).toDF("ind","k","v") # .map(lambda x:(x[1],x[0][0],x[0][1]))).toDF("ind","k","v")
df.show(6) # df.show(6)
df = df.toPandas() # df = df.toPandas()
from hdfs import InsecureClient from hdfs import InsecureClient
client = InsecureClient('http://nvwa01:50070') client = InsecureClient('http://nvwa01:50070')
hdfs.ext.dataframe.write_dataframe(client,"/recommend/tr",df) df = hdfs.ext.dataframe.read_dataframe(client,"/recommend/nearby/part-00198-ad7e4833-f388-483e-8f68-c2fde8d3c4d8-c000.csv")
print(df.head())
# spark.sql("use online") # spark.sql("use online")
# spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar") # spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment