Commit c5834874 authored by 王志伟's avatar 王志伟
parents 7182b1ce 500b2501
......@@ -57,11 +57,12 @@ def get_data():
# print(df.head(2)
print("before")
print(df.shape)
print("after")
df = df.drop_duplicates()
df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","app_list"])
print("after")
print(df.shape)
app_list_number,app_list_map = multi_hot(df,"app_list",1)
level2_number,level2_map = multi_hot(df,"clevel2_id",1+app_list_number)
# df["app_list"] = df["app_list"].fillna("lost_na")
......
......@@ -80,10 +80,7 @@ def get_data():
print(df.shape)
df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","app_list"])
# df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
print("去重后样本数量:",df.shape)
app_list_number,app_list_map = multi_hot(df,"app_list",2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment