Commit 82ba92aa authored by 张彦钊's avatar 张彦钊

change transform

parent be9387e7
......@@ -143,7 +143,7 @@ def get_data():
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:"+validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=5)).strftime("%Y-%m-%d")
start = (temp - datetime.timedelta(days=2)).strftime("%Y-%m-%d")
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select device_id,y,z,stat_date,ucity_id,cid_id,clevel1_id,ccity_name from esmm_train_data " \
"where stat_date >= '{}'".format(start)
......@@ -180,12 +180,12 @@ def transform(a,validate_date):
df["stat_date"] = df[0].apply(lambda x: x.split(",")[0])
df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
df["city_id"] = df[0].apply(lambda x: x.split(",")[2])
df["diary_id"] = df[0].apply(lambda x: x.split(",")[3])
df["cid"] = df[0].apply(lambda x: x.split(",")[3])
df["seq"] = list(range(df.shape[0]))
df["seq"] = df["seq"].astype("str")
df["ffm"] = df[0].apply(lambda x: ",".join(x.split(",")[4:]))
df["ffm"] = df["seq"].str.cat(df["ffm"], sep=",")
df["random"] = np.random.randint(1, 2147483647, df.shape[0])
df["data"] = df[0].apply(lambda x: ",".join(x.split(",")[4:]))
df["data"] = df["seq"].str.cat(df["data"], sep=",")
df["number"] = np.random.randint(1, 2147483647, df.shape[0])
df = df.drop([0,"seq"], axis=1)
print(df.head())
......@@ -241,6 +241,7 @@ def get_predict_set(ucity_id, cid,model):
df["cid_id"] = df["cid_id"].astype("str")
df["y"] = df["y"].astype("str")
df["z"] = df["z"].astype("str")
df["label"] = df["label"].astype("str")
df["y"] = df["label"].str.cat(
[df["device_id"].values.tolist(), df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
df["y"].values.tolist(), df["z"].values.tolist()], sep=",")
......@@ -255,12 +256,12 @@ def get_predict_set(ucity_id, cid,model):
df["label"] = df[0].apply(lambda x: x.split(",")[0])
df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
df["city_id"] = df[0].apply(lambda x: x.split(",")[2])
df["diary_id"] = df[0].apply(lambda x: x.split(",")[3])
df["cid"] = df[0].apply(lambda x: x.split(",")[3])
df["seq"] = list(range(df.shape[0]))
df["seq"] = df["seq"].astype("str")
df["ffm"] = df[0].apply(lambda x: ",".join(x.split(",")[4:]))
df["ffm"] = df["seq"].str.cat(df["ffm"], sep=",")
df["random"] = np.random.randint(1, 2147483647, df.shape[0])
df["data"] = df[0].apply(lambda x: ",".join(x.split(",")[4:]))
df["data"] = df["seq"].str.cat(df["data"], sep=",")
df["number"] = np.random.randint(1, 2147483647, df.shape[0])
df = df.drop([0, "seq"], axis=1)
print(df.head())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment