Commit ee789e40 authored by 张彦钊's avatar 张彦钊

change test file

parent 2fe3223d
...@@ -36,6 +36,11 @@ def multi_hot(df,column,n): ...@@ -36,6 +36,11 @@ def multi_hot(df,column,n):
return number,app_list_map return number,app_list_map
def get_map():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data"
validate_date = con_sql(db, sql)[0].values.tolist()[0]
def feature_engineer(): def feature_engineer():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data" sql = "select max(stat_date) from esmm_train_data"
......
...@@ -150,15 +150,21 @@ if __name__ == '__main__': ...@@ -150,15 +150,21 @@ if __name__ == '__main__':
# [path + "tr/part-r-00000"] # [path + "tr/part-r-00000"]
import subprocess import subprocess
spark = SparkSession.builder.getOrCreate() # spark = SparkSession.builder.getOrCreate()
b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)] # b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)]
rdd = spark.sparkContext.parallelize(b) # rdd = spark.sparkContext.parallelize(b)
df = spark.createDataFrame(rdd).toDF("id", "n") # df = spark.createDataFrame(rdd).toDF("id", "n")
df.show() # df.show()
df.createOrReplaceTempView("df") # df.createOrReplaceTempView("df")
t = spark.sql("select id from df").map() # t = spark.sql("select id from df").map()
print(t) # print(t)
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select device_id from esmm_train_data limit 10"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
a = list(result)
print(a)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment