Commit 3a44f1b9 authored by 张彦钊's avatar 张彦钊

修改esmm测试项目

parent 69eddb8e
......@@ -69,8 +69,8 @@ def get_data():
hospital = con_sql(db, sql)
hospital = hospital.rename(columns={0: "service_id", 1: "hospital_id"})
# print(hospital.head())
print("hospital")
print(hospital.count())
# print("hospital")
# print(hospital.count())
hospital["service_id"] = hospital["service_id"].astype("str")
df = pd.merge(df, hospital, on='service_id', how='left')
df = df.drop("service_id", axis=1)
......@@ -80,7 +80,10 @@ def get_data():
print(df.shape)
df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","app_list","hospital_id","level3_ids"])
"channel", "top", "time", "stat_date","app_list"])
# df = df.drop_duplicates(["ucity_id", "clevel2_id", "ccity_name", "device_type", "manufacturer",
# "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
print("去重后样本数量:",df.shape)
app_list_number,app_list_map = multi_hot(df,"app_list",2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment