diff --git a/tensnsorflow/ffm.py b/tensnsorflow/ffm.py index 2956e0ce89dc2161db0966a1f168258616dd596a..d08355535f3946205982f90cc2274f5419c91de1 100644 --- a/tensnsorflow/ffm.py +++ b/tensnsorflow/ffm.py @@ -31,7 +31,7 @@ def get_data(): temp = temp.rename(columns={0: "device_id"}) print("click data ok") print(temp.head()) - df = pd.merge(esmm,temp,on = "device_id") + df = pd.merge(esmm,temp,on = "device_id").dropna() print(df.head()) @@ -44,6 +44,7 @@ def get_data(): df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(), df["y"].values.tolist(),df["z"].values.tolist()], sep=",") df = df.drop("z", axis=1) + print(df.head()) df = df[df["stat_date"] != "2018-11-25"] transform(df,"train") df = df[df["stat_date"] == "2018-11-25"] @@ -61,7 +62,7 @@ def transform(df,table): df["ffm"] = df[0].apply(lambda x: x.split(",")[4]) df["seq"] = list(range(df.shape[0])) df["seq"] = df["seq"].astype("str") - df["ffm"] = df["seq"].str.cat(df[["y", "ffm"]], sep=",") + df["ffm"] = df["seq"].str.cat([df["y"].values.tolist(), df["ffm"].values.tolist()], sep=",") df["number"] = np.random.randint(1, 2147483647, df.shape[0]) df = df.drop(0, axis=1) yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')