Commit ce8cfc08 authored by 张彦钊's avatar 张彦钊

change test file

parent 08cc464c
......@@ -176,6 +176,7 @@ def feature_engineer():
"where e.stat_date >= '{}'".format(start)
df = spark.sql(sql)
df.show(2)
df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids",
......@@ -183,7 +184,6 @@ def feature_engineer():
df = df.na.fill(dict(zip(features, features)))
c = time.time()
rdd = df.select("stat_date","y", "z","app_list","level2_ids","level3_ids",
"tag1","tag2","tag3","tag4","tag5","tag6","tag7",
"ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment