Commit 9d25ea4b authored by 张彦钊's avatar 张彦钊

修改str cat函数

parent fb81a269
......@@ -44,32 +44,33 @@ def get_data():
df["cid_id"] = df["cid_id"].astype("str")
df["y"] = df["y"].astype("str")
df["z"] = df["z"].astype("str")
df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
df["y"] = df["device_id"].str.cat([df["stat_date"].values.tolist(),df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
df = df.drop("z", axis=1)
print(df.head())
train = df[df["stat_date"] != "2018-11-25"]
transform(train,"crvtrain.csv")
test = df[df["stat_date"] == "2018-11-25"]
transform(test, "crvtest.csv")
transform(df)
def transform(df,table):
def transform(df):
model = multiFFMFormatPandas()
df = model.fit_transform(df, y="y", n=80000, processes=20)
df = pd.DataFrame(df)
df["device_id"] = df[0].apply(lambda x: x.split(",")[0])
df["city_id"] = df[0].apply(lambda x: x.split(",")[1])
df["diary_id"] = df[0].apply(lambda x: x.split(",")[2])
df["stat_date"] = df[0].apply(lambda x: x.split(",")[0])
df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
df["city_id"] = df[0].apply(lambda x: x.split(",")[2])
df["diary_id"] = df[0].apply(lambda x: x.split(",")[3])
df["seq"] = list(range(df.shape[0]))
df["seq"] = df["seq"].astype("str")
df["ffm"] = df[0].apply(lambda x: ",".join(x.split(",")[3:]))
df["ffm"] = df[0].apply(lambda x: ",".join(x.split(",")[4:]))
df["ffm"] = df["seq"].str.cat(df["ffm"], sep=",")
df["random"] = np.random.randint(1, 2147483647, df.shape[0])
df = df.drop(0, axis=1).drop("seq",axis=1)
print("size")
print(df.shape)
df.to_csv(path+table,index=None)
train = df[df["stat_date"] != "2018-11-25"].drop("stat_date",axis=1)
test = df[df["stat_date"] == "2018-11-25"].drop("stat_date",axis=1)
train.to_csv(path+"train.csv",index=None)
test.to_csv(path + "test.csv", index=None)
# yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
# n = 100000
# for i in range(0,df.shape[0],n):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment