add print

12be6962 · 张彦钊 · f6badd3c · 12be6962
Commit 12be6962 authored Dec 26, 2018 by 张彦钊
Show whitespace changes
Inline Side-by-side

Showing with 23 additions and 24 deletions

ffm.py tensnsorflow/ffm.py +23 -24

No files found.
--- a/tensnsorflow/ffm.py
+++ b/tensnsorflow/ffm.py
@@ -98,12 +98,7 @@ class multiFFMFormatPandas:
        result_map = {}
        for i in data_list:
-            print("before:total")
-            print(len(result_map))
-            print(len(i.get()))
            result_map.update(i.get())
-            print("result_map")
-            print(len(result_map))
        pool.close()
        pool.join()
@@ -120,10 +115,10 @@ class multiFFMFormatPandas:
        x = 0
        while True:
            if x + step < data.__len__():
-                data_list.append(data.loc[x:x + step])
+                data_list.append(data.iloc[x:x + step])
-                x = x + step + 1
+                x = x + step
            else:
-                data_list.append(data.loc[x:data.__len__()])
+                data_list.append(data.iloc[x:data.__len__()])
                break
        return data_list
@@ -179,6 +174,8 @@ def get_data():
    ucity_id = list(set(df["ucity_id"].values.tolist()))
    manufacturer = list(set(df["manufacturer"].values.tolist()))
    channel = list(set(df["channel"].values.tolist()))
+    print("before transform")
+    print(df.shape)
    return df,validate_date,ucity_id,ccity_name,manufacturer,channel
@@ -187,22 +184,24 @@ def transform(a,validate_date):
    model = multiFFMFormatPandas()
    df = model.fit_transform(a, y="y", n=160000, processes=22)
    df = pd.DataFrame(df)
-    df["stat_date"] = df[0].apply(lambda x: x.split(",")[0])
+    print("after transform")
-    df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
+    print(df.shape)
-    df["y"] = df[0].apply(lambda x: x.split(",")[2])
+    # df["stat_date"] = df[0].apply(lambda x: x.split(",")[0])
-    df["z"] = df[0].apply(lambda x: x.split(",")[3])
+    # df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
-    df["number"] = np.random.randint(1, 2147483647, df.shape[0])
+    # df["y"] = df[0].apply(lambda x: x.split(",")[2])
-    df["seq"] = list(range(df.shape[0]))
+    # df["z"] = df[0].apply(lambda x: x.split(",")[3])
-    df["seq"] = df["seq"].astype("str")
+    # df["number"] = np.random.randint(1, 2147483647, df.shape[0])
-    df["data"] = df[0].apply(lambda x: ",".join(x.split(",")[2:]))
+    # df["seq"] = list(range(df.shape[0]))
-    df["data"] = df["seq"].str.cat(df["data"], sep=",")
+    # df["seq"] = df["seq"].astype("str")
-    df = df.drop([0,"seq"], axis=1)
+    # df["data"] = df[0].apply(lambda x: ",".join(x.split(",")[2:]))
-    print(df.head(2))
+    # df["data"] = df["seq"].str.cat(df["data"], sep=",")
+    # df = df.drop([0,"seq"], axis=1)
-    train = df[df["stat_date"] != validate_date]
+    # print(df.head(2))
-    train = train.drop("stat_date",axis=1)
-    test = df[df["stat_date"] == validate_date]
+    # train = df[df["stat_date"] != validate_date]
-    test = test.drop("stat_date",axis=1)
+    # train = train.drop("stat_date",axis=1)
+    # test = df[df["stat_date"] == validate_date]
+    # test = test.drop("stat_date",axis=1)
    # print("train shape")
    # print(train.shape)
    # train.to_csv(path + "tr.csv", sep="\t", index=False)