Commit e6eb6830 authored by 王志伟's avatar 王志伟
parents 9e356afa 3a44f1b9
...@@ -69,8 +69,8 @@ def get_data(): ...@@ -69,8 +69,8 @@ def get_data():
hospital = con_sql(db, sql) hospital = con_sql(db, sql)
hospital = hospital.rename(columns={0: "service_id", 1: "hospital_id"}) hospital = hospital.rename(columns={0: "service_id", 1: "hospital_id"})
# print(hospital.head()) # print(hospital.head())
print("hospital") # print("hospital")
print(hospital.count()) # print(hospital.count())
hospital["service_id"] = hospital["service_id"].astype("str") hospital["service_id"] = hospital["service_id"].astype("str")
df = pd.merge(df, hospital, on='service_id', how='left') df = pd.merge(df, hospital, on='service_id', how='left')
df = df.drop("service_id", axis=1) df = df.drop("service_id", axis=1)
...@@ -80,7 +80,10 @@ def get_data(): ...@@ -80,7 +80,10 @@ def get_data():
print(df.shape) print(df.shape)
df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer", df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","app_list","hospital_id","level3_ids"]) "channel", "top", "time", "stat_date","app_list"])
# df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
print("去重后样本数量:",df.shape) print("去重后样本数量:",df.shape)
app_list_number,app_list_map = multi_hot(df,"app_list",2) app_list_number,app_list_map = multi_hot(df,"app_list",2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment