Commit 62ed81b4 authored by 王志伟's avatar 王志伟
parents b81746a9 4a3717f3
......@@ -65,7 +65,6 @@ def sort_app():
"child": {"小伴龙", "儿歌多多", "宝宝巴士奇妙屋", "智慧树", "贝瓦儿歌", "儿歌点点", "宝贝听听", "宝宝小厨房", "宝宝游乐园", "叽里呱啦"},
"homework": {"作业帮", "小猿搜题", "一起作业学生端", "学霸君", "互动作业", "猿题库", "纳米盒", "阿凡题", "洋葱数学"},
"work": {"钉钉", "企业微信", "移动彩云", "云之家", "今目标", "口袋助理", "推事本", "奇鱼微办公", "工作圈", "明道"},
"home": {"最美装修", "齐家网", "土巴兔装修", "装修头条", "装修管家", "窝牛装修", "丽芙家居", "酷家乐装修", "惠装装修", "房天下装修"},
"job": {"智联招聘", "前程无忧", "斗米", "拉勾", "Boss直聘", "猎聘同道", "智联招聘"}
}
df["app_list"] = df["app_list"].apply(json_format)
......
......@@ -218,14 +218,23 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
print("before filter:")
print(df.shape)
print(df.loc[df["device_id"]=="358035085192742"].shape)
df = df[df["ucity_id"].isin(ucity_id)]
print("after ucity filter:")
print(df.shape)
print(df.loc[df["device_id"] == "358035085192742"].shape)
df = df[df["ccity_name"].isin(ccity_name)]
print("after ccity_name filter:")
print(df.shape)
print(df.loc[df["device_id"] == "358035085192742"].shape)
df = df[df["manufacturer"].isin(manufacturer)]
print("after manufacturer filter:")
print(df.shape)
print(df.loc[df["device_id"] == "358035085192742"].shape)
df = df[df["channel"].isin(channel)]
print("after channel filter:")
print(df.shape)
print(df.loc[df["device_id"] == "358035085192742"].shape)
df["cid_id"] = df["cid_id"].astype("str")
df["clevel1_id"] = df["clevel1_id"].astype("str")
df["top"] = df["top"].astype("str")
......@@ -239,6 +248,8 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
print(df.head(2))
df = model.transform(df,n=160000, processes=22)
df = pd.DataFrame(df)
print("after transform")
print(df.shape)
df["label"] = df[0].apply(lambda x: x.split(",")[0])
df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
df["city_id"] = df[0].apply(lambda x: x.split(",")[2])
......@@ -251,14 +262,21 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
df = df.drop([0, "seq"], axis=1)
print(df.head())
print(df.loc[df["device_id"] == "358035085192742"].shape)
native_pre = df[df["label"] == "0"]
native_pre = native_pre.drop("label", axis=1)
print("native")
print(native_pre.shape)
print(native_pre.loc[native_pre["device_id"] == "358035085192742"].shape)
native_pre.to_csv(path+"native.csv",sep="\t",index=False)
# print("native_pre shape")
# print(native_pre.shape)
nearby_pre = df[df["label"] == "1"]
nearby_pre = nearby_pre.drop("label", axis=1)
print("nearby")
print(nearby_pre.shape)
print(nearby_pre.loc[nearby_pre["device_id"] == "358035085192742"].shape)
nearby_pre.to_csv(path + "nearby.csv", sep="\t", index=False)
# print("nearby_pre shape")
# print(nearby_pre.shape)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment