Commit 42f6ddfb authored by 张彦钊's avatar 张彦钊

把top特征和l1、l2特征合并

parent 952ee419
......@@ -50,18 +50,19 @@ def get_data():
unique_values = []
features = ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date"]
"channel", "top", "time", "stat_date"]
for i in features:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
# 下面这行代码是为了区分不同的列中有相同的值
df[i] = df[i] + i
unique_values.extend(list(df[i].unique()))
df["l2"] = df["l2"].astype("str")
df["l2"] = df["l2"].fillna("lost")
# l1和l2中的值允许相同
df["l2"] = df["l2"]+"l1"
unique_values.extend(list(df["l2"].unique()))
for i in ["l1","l2"]:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
# l1和l2中的值与top类别是一个类别
df[i] = df[i]+"top"
unique_values.extend(list(df[i].unique()))
print("features:")
print(len(unique_values))
......@@ -78,7 +79,6 @@ def get_data():
train[i] = train[i].map(value_map)
test[i] = test[i].map(value_map)
print("train shape")
print(train.shape)
print("test shape")
......@@ -120,14 +120,16 @@ def get_predict(date,value_map):
print(df.shape)
features = ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date"]
"channel", "top", "time", "stat_date"]
for i in features:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
df[i] = df[i] + i
df["l2"] = df["l2"].astype("str")
df["l2"] = df["l2"].fillna("lost")
df["l2"] = df["l2"] + "l1"
for i in ["l1","l2"]:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
# l1和l2中的值与top类别是一个类别
df[i] = df[i]+"top"
native_pre = df[df["label"] == 0]
native_pre = native_pre.drop("label", axis=1)
......@@ -147,10 +149,12 @@ def get_predict(date,value_map):
print("native")
print(native_pre.shape)
print(native_pre.head())
write_csv(native_pre, "native",200000)
print("nearby")
print(nearby_pre.shape)
print(nearby_pre.head())
write_csv(nearby_pre, "nearby", 160000)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment