Commit c102f5d4 authored by 张彦钊's avatar 张彦钊

按照用户去重应用列表样本

parent d46aba90
......@@ -173,12 +173,12 @@ def get_data():
df["top"] = df["top"].astype("str")
df["y"] = df["stat_date"].str.cat([df["device_id"].values.tolist(),df["y"].values.tolist(),df["z"].values.tolist()], sep=",")
df = df.drop(["z","stat_date","device_id"], axis=1)
df = df.fillna(0)
print(df.head(2))
features = 0
for i in ["ucity_id","clevel1_id","ccity_name","device_type","manufacturer","channel"]:
for i in ["ucity_id","clevel1_id","ccity_name","device_type","manufacturer","channel","top"]:
features = features + len(df[i].unique())
df[i] = df[i].fillna(0)
print(df.count())
print(df.head(2))
print("fields:{}".format(df.shape[1]-1))
print("features:{}".format(features+46))
ccity_name = list(set(df["ccity_name"].values.tolist()))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment