change test file

35ee7eb2 · 张彦钊 · 68d51e67 · 35ee7eb2
Commit 35ee7eb2 authored Jun 20, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 7 deletions

multi_hot.py tensnsorflow/multi_hot.py +4 -7

No files found.
--- a/tensnsorflow/multi_hot.py
+++ b/tensnsorflow/multi_hot.py
@@ -157,7 +157,7 @@ def feature_engineer():
    validate_date = con_sql(db, sql)[0].values.tolist()[0]
    print("validate_date:" + validate_date)
    temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
-    start = (temp - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
+    start = (temp - datetime.timedelta(days=100)).strftime("%Y-%m-%d")
    print(start)

    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC')
@@ -212,9 +212,6 @@ def feature_engineer():

    df = df.na.fill(dict(zip(features, features)))

-    print("train number")
-    print(df.count())
-
    rdd = df.select("stat_date", "y", "z", "app_list", "level2_ids", "level3_ids",
                    "tag1", "tag2", "tag3", "tag4", "tag5", "tag6", "tag7",
                    "ucity_id", "ccity_name", "device_type", "manufacturer", "channel", "top", "time",
@@ -303,8 +300,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):

    df = spark.sql(sql)
    df = df.drop_duplicates(["ucity_id", "device_id", "cid_id"])
-    print("esmm pre count")
-    print(df.count())
+
    df = df.na.fill(dict(zip(features, features)))
    f = time.time()
    rdd = df.select("label", "y", "z", "ucity_id", "device_id", "cid_id", "app_list", "level2_ids", "level3_ids",
@@ -331,7 +327,8 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):


    rdd.persist(storageLevel= StorageLevel.MEMORY_ONLY_SER)
-
+    print("esmm pre count")
+    print(rdd.count())
    # native_pre = spark.createDataFrame(rdd.filter(lambda x:x[0] == 0).map(lambda x:(x[3],x[4],x[5],x[17])))\
    #     .toDF("city","uid","cid_id","number")
    # print("native csv")