Commit 67f15321 authored by 张彦钊's avatar 张彦钊

add user

parent 0b6f3d0a
...@@ -167,12 +167,14 @@ def get_data(): ...@@ -167,12 +167,14 @@ def get_data():
df["y"].values.tolist(),df["z"].values.tolist()], sep=",") df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
df = df.drop(["z","device_id"], axis=1).fillna(0.0) df = df.drop(["z","device_id"], axis=1).fillna(0.0)
print(df.head(2)) print(df.head(2))
print("fields:{}".format(df.shape[1]-1))
print("features:{}".format(len(cid)))
return df,validate_date,ucity_id,cid return df,validate_date,ucity_id,cid
def transform(a,validate_date): def transform(a,validate_date):
model = multiFFMFormatPandas() model = multiFFMFormatPandas()
df = model.fit_transform(a, y="y", n=160000, processes=22) df = model.fit_transform(a, y="y", n=160000, processes=26)
df = pd.DataFrame(df) df = pd.DataFrame(df)
df["stat_date"] = df[0].apply(lambda x: x.split(",")[0]) df["stat_date"] = df[0].apply(lambda x: x.split(",")[0])
df["device_id"] = df[0].apply(lambda x: x.split(",")[1]) df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
...@@ -209,8 +211,13 @@ def get_predict_set(ucity_id, cid,model): ...@@ -209,8 +211,13 @@ def get_predict_set(ucity_id, cid,model):
df = con_sql(db, sql) df = con_sql(db, sql)
df = df.rename(columns={0: "device_id", 1: "y", 2: "z", 3: "stat_date", 4: "ucity_id", 5: "cid_id", df = df.rename(columns={0: "device_id", 1: "y", 2: "z", 3: "stat_date", 4: "ucity_id", 5: "cid_id",
6: "clevel1_id", 7: "ccity_name",26:"label"}) 6: "clevel1_id", 7: "ccity_name",26:"label"})
print("before filter:")
print(df.shape)
df = df[df["cid_id"].isin(cid)] df = df[df["cid_id"].isin(cid)]
print("after cid filter:")
print(df.shape)
df = df[df["ucity_id"].isin(ucity_id)] df = df[df["ucity_id"].isin(ucity_id)]
print("after ucity filter:")
print(df.shape) print(df.shape)
df["clevel1_id"] = df["clevel1_id"].astype("str") df["clevel1_id"] = df["clevel1_id"].astype("str")
df["cid_id"] = df["cid_id"].astype("str") df["cid_id"] = df["cid_id"].astype("str")
...@@ -221,8 +228,6 @@ def get_predict_set(ucity_id, cid,model): ...@@ -221,8 +228,6 @@ def get_predict_set(ucity_id, cid,model):
[df["device_id"].values.tolist(), df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(), [df["device_id"].values.tolist(), df["ucity_id"].values.tolist(), df["cid_id"].values.tolist(),
df["y"].values.tolist(), df["z"].values.tolist()], sep=",") df["y"].values.tolist(), df["z"].values.tolist()], sep=",")
df = df.drop(["z","label","device_id"], axis=1).fillna(0.0) df = df.drop(["z","label","device_id"], axis=1).fillna(0.0)
print("df ok")
print(df.shape)
print(df.head(2)) print(df.head(2))
df = model.transform(df,n=160000, processes=22) df = model.transform(df,n=160000, processes=22)
df = pd.DataFrame(df) df = pd.DataFrame(df)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment