change test file

68d51e67 · 张彦钊 · d6984584 · 68d51e67
Commit 68d51e67 authored Jun 20, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

multi_hot.py tensnsorflow/multi_hot.py +3 -3

No files found.
--- a/tensnsorflow/multi_hot.py
+++ b/tensnsorflow/multi_hot.py
@@ -206,15 +206,15 @@ def feature_engineer():

    df = spark.sql(sql)

-    print("train number")
-    print(df.count())
-
    df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
                             "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids",
                             "tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7","search_tag2","search_tag3"])

    df = df.na.fill(dict(zip(features, features)))

+    print("train number")
+    print(df.count())
+
    rdd = df.select("stat_date", "y", "z", "app_list", "level2_ids", "level3_ids",
                    "tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7",
                    "ucity_id", "ccity_name", "device_type", "manufacturer", "channel", "top", "time",