Commit 41528b41 authored by 张彦钊's avatar 张彦钊

把最近一天的数据集放进训练集

parent f212a78c
......@@ -191,7 +191,7 @@ def con_sql(db,sql):
def test():
sql = "select stat_date,cid_id from esmm_train_data e where stat_date >= '{}'".format("2019-04-25")
sql = "select stat_date,cid_id from esmm_train_data e where stat_date >= '{}'".format("2019-03-25")
df = spark.createDataFrame(spark.sql(sql).rdd.map(lambda x:(x[0],x[1])).zipWithIndex()
.map(lambda x:(x[1],x[0][0],x[0][1]))).toDF("ind","k","v")
df.show(6)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment