Commit 3f7a4574 authored by 张彦钊's avatar 张彦钊

增加特征

parent 5d851283
......@@ -33,6 +33,10 @@ def multi_hot(df,column,n):
def feature_engineer():
# TODO 删除下面的测试写入
df = spark.sql("select y,z from esmm_train_data limit 60")
df.write.format("avro").save(path=path + "tr", mode="overwrite")
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data"
validate_date = con_sql(db, sql)[0].values.tolist()[0]
......@@ -54,8 +58,6 @@ def feature_engineer():
df = spark.sql(sql)
df.write.format("avro").save(path=path + "tr", mode="overwrite")
url = "jdbc:mysql://172.16.30.143:3306/zhengxing"
jdbcDF = spark.read.format("jdbc").option("driver", "com.mysql.jdbc.Driver").option("url", url) \
.option("dbtable", "api_service").option("user", 'work').option("password", 'BJQaT9VzDcuPBqkd').load()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment