Commit 2e6fd82b authored by 张彦钊's avatar 张彦钊

修改str cat函数

parent fe15d4ab
...@@ -24,31 +24,33 @@ def get_data(): ...@@ -24,31 +24,33 @@ def get_data():
esmm = esmm.rename(columns={0:"stat_date",1: "device_id",2:"ucity_id",3:"cid_id",4:"diary_service_id",5:"y", esmm = esmm.rename(columns={0:"stat_date",1: "device_id",2:"ucity_id",3:"cid_id",4:"diary_service_id",5:"y",
6:"z",7:"clevel1_id",8:"slevel1_id"}) 6:"z",7:"clevel1_id",8:"slevel1_id"})
print("esmm data ok") print("esmm data ok")
print(esmm.shape) esmm["x"] = esmm["y"].str.cat(esmm["z"].values.tolist(), sep=",")
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') print(esmm.head())
sql = "select * from home_tab_click" # print(esmm.shape)
temp = con_sql(db,sql) # db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
temp = temp.rename(columns={0: "device_id"}) # sql = "select * from home_tab_click"
print("click data ok") # temp = con_sql(db,sql)
# print(temp.head()) # temp = temp.rename(columns={0: "device_id"})
df = pd.merge(esmm,temp,on = "device_id",how='left').fillna(0) # print("click data ok")
# # print(temp.head())
print(df.shape) # df = pd.merge(esmm,temp,on = "device_id",how='left').fillna(0)
#
df["diary_service_id"] = df["diary_service_id"].astype("str") # # print(df.shape)
df["clevel1_id"] = df["clevel1_id"].astype("str") #
df["slevel1_id"] = df["slevel1_id"].astype("str") # df["diary_service_id"] = df["diary_service_id"].astype("str")
df["cid_id"] = df["cid_id"].astype("str") # df["clevel1_id"] = df["clevel1_id"].astype("str")
df["y"] = df["y"].astype("str") # df["slevel1_id"] = df["slevel1_id"].astype("str")
df["z"] = df["z"].astype("str") # df["cid_id"] = df["cid_id"].astype("str")
df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(), # df["y"] = df["y"].astype("str")
df["y"].values.tolist(),df["z"].values.tolist()], sep=",") # df["z"] = df["z"].astype("str")
df = df.drop("z", axis=1) # df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
print(df.head()) # df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
train = df[df["stat_date"] != "2018-11-25"] # df = df.drop("z", axis=1)
transform(train,"crvtrain.csv") # print(df.head())
test = df[df["stat_date"] == "2018-11-25"] # train = df[df["stat_date"] != "2018-11-25"]
transform(test, "crvtest.csv") # transform(train,"crvtrain.csv")
# test = df[df["stat_date"] == "2018-11-25"]
# transform(test, "crvtest.csv")
def transform(df,table): def transform(df,table):
...@@ -62,7 +64,7 @@ def transform(df,table): ...@@ -62,7 +64,7 @@ def transform(df,table):
df["seq"] = list(range(df.shape[0])) df["seq"] = list(range(df.shape[0]))
df["seq"] = df["seq"].astype("str") df["seq"] = df["seq"].astype("str")
df["ffm"] = df[0].apply(lambda x: x.split(",")[3:]) df["ffm"] = df[0].apply(lambda x: x.split(",")[3:])
df["ffm"] = df["seq"].str.cat([df["ffm"].values.tolist()], sep=",") df["ffm"] = df["seq"].str.cat(df["ffm"].values.tolist(), sep=",")
df["random"] = np.random.randint(1, 2147483647, df.shape[0]) df["random"] = np.random.randint(1, 2147483647, df.shape[0])
df = df.drop(0, axis=1).drop("seq",axis=1) df = df.drop(0, axis=1).drop("seq",axis=1)
print("size") print("size")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment