Commit ebb858ce authored by 张彦钊's avatar 张彦钊

add distinct

parent a704e2fd
......@@ -6,6 +6,8 @@ import pytispark.pytispark as pti
from pyspark.sql import SparkSession
import datetime
import pandas as pd
import hdfs
import avro
def app_list_func(x,l):
b = x.split(",")
......@@ -200,7 +202,10 @@ def test():
df = spark.createDataFrame(spark.sql(sql).rdd.map(lambda x:(x[0],x[1])).zipWithIndex()
.map(lambda x:(x[1],x[0][0],x[0][1]))).toDF("ind","k","v")
df.show(6)
df.write.csv('/recommend/test', mode='overwrite', header=True)
df = df.toPandas()
from hdfs import InsecureClient
client = InsecureClient('http://nvwa01:50070')
hdfs.ext.dataframe.write_dataframe(client,"/recommend/tr",df)
# spark.sql("use online")
# spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar")
......@@ -241,7 +246,9 @@ if __name__ == '__main__':
ti.tidbMapDatabase("jerry_test")
spark.sparkContext.setLogLevel("WARN")
validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer()
# validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer()
# get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
test()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment