Commit 2e6fd82b authored by 张彦钊's avatar 张彦钊

修改str cat函数

parent fe15d4ab
......@@ -24,31 +24,33 @@ def get_data():
esmm = esmm.rename(columns={0:"stat_date",1: "device_id",2:"ucity_id",3:"cid_id",4:"diary_service_id",5:"y",
6:"z",7:"clevel1_id",8:"slevel1_id"})
print("esmm data ok")
print(esmm.shape)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
sql = "select * from home_tab_click"
temp = con_sql(db,sql)
temp = temp.rename(columns={0: "device_id"})
print("click data ok")
# print(temp.head())
df = pd.merge(esmm,temp,on = "device_id",how='left').fillna(0)
print(df.shape)
df["diary_service_id"] = df["diary_service_id"].astype("str")
df["clevel1_id"] = df["clevel1_id"].astype("str")
df["slevel1_id"] = df["slevel1_id"].astype("str")
df["cid_id"] = df["cid_id"].astype("str")
df["y"] = df["y"].astype("str")
df["z"] = df["z"].astype("str")
df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
df = df.drop("z", axis=1)
print(df.head())
train = df[df["stat_date"] != "2018-11-25"]
transform(train,"crvtrain.csv")
test = df[df["stat_date"] == "2018-11-25"]
transform(test, "crvtest.csv")
esmm["x"] = esmm["y"].str.cat(esmm["z"].values.tolist(), sep=",")
print(esmm.head())
# print(esmm.shape)
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
# sql = "select * from home_tab_click"
# temp = con_sql(db,sql)
# temp = temp.rename(columns={0: "device_id"})
# print("click data ok")
# # print(temp.head())
# df = pd.merge(esmm,temp,on = "device_id",how='left').fillna(0)
#
# # print(df.shape)
#
# df["diary_service_id"] = df["diary_service_id"].astype("str")
# df["clevel1_id"] = df["clevel1_id"].astype("str")
# df["slevel1_id"] = df["slevel1_id"].astype("str")
# df["cid_id"] = df["cid_id"].astype("str")
# df["y"] = df["y"].astype("str")
# df["z"] = df["z"].astype("str")
# df["y"] = df["device_id"].str.cat([df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
# df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
# df = df.drop("z", axis=1)
# print(df.head())
# train = df[df["stat_date"] != "2018-11-25"]
# transform(train,"crvtrain.csv")
# test = df[df["stat_date"] == "2018-11-25"]
# transform(test, "crvtest.csv")
def transform(df,table):
......@@ -62,7 +64,7 @@ def transform(df,table):
df["seq"] = list(range(df.shape[0]))
df["seq"] = df["seq"].astype("str")
df["ffm"] = df[0].apply(lambda x: x.split(",")[3:])
df["ffm"] = df["seq"].str.cat([df["ffm"].values.tolist()], sep=",")
df["ffm"] = df["seq"].str.cat(df["ffm"].values.tolist(), sep=",")
df["random"] = np.random.randint(1, 2147483647, df.shape[0])
df = df.drop(0, axis=1).drop("seq",axis=1)
print("size")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment