Merge branch 'master' of http://git.wanmeizhensuo.com/ML/ffm-baseline

62ed81b4 · 王志伟 · b81746a9 · 4a3717f3 · 62ed81b4 · 62ed81b4
Commit 62ed81b4 authored Dec 26, 2018 by 王志伟
Hide whitespace changes
Inline Side-by-side

Showing with 18 additions and 1 deletion

applist.py tensnsorflow/applist.py +0 -1

ffm.py tensnsorflow/ffm.py +18 -0

No files found.
--- a/tensnsorflow/applist.py
+++ b/tensnsorflow/applist.py
@@ -65,7 +65,6 @@ def sort_app():
                "child": {"小伴龙", "儿歌多多", "宝宝巴士奇妙屋", "智慧树", "贝瓦儿歌", "儿歌点点", "宝贝听听", "宝宝小厨房", "宝宝游乐园", "叽里呱啦"},
                "homework": {"作业帮", "小猿搜题", "一起作业学生端", "学霸君", "互动作业", "猿题库", "纳米盒", "阿凡题", "洋葱数学"},
                "work": {"钉钉", "企业微信", "移动彩云", "云之家", "今目标", "口袋助理", "推事本", "奇鱼微办公", "工作圈", "明道"},
-                "home": {"最美装修", "齐家网", "土巴兔装修", "装修头条", "装修管家", "窝牛装修", "丽芙家居", "酷家乐装修", "惠装装修", "房天下装修"},
                "job": {"智联招聘", "前程无忧", "斗米", "拉勾", "Boss直聘", "猎聘同道", "智联招聘"}
                 }
    df["app_list"] = df["app_list"].apply(json_format)

--- a/tensnsorflow/ffm.py
+++ b/tensnsorflow/ffm.py
@@ -218,14 +218,23 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
    print("before filter:")
    print(df.shape)
+    print(df.loc[df["device_id"]=="358035085192742"].shape)
    df = df[df["ucity_id"].isin(ucity_id)]
    print("after ucity filter:")
    print(df.shape)
+    print(df.loc[df["device_id"] == "358035085192742"].shape)
    df = df[df["ccity_name"].isin(ccity_name)]
    print("after ccity_name filter:")
+    print(df.shape)
+    print(df.loc[df["device_id"] == "358035085192742"].shape)
    df = df[df["manufacturer"].isin(manufacturer)]
+    print("after manufacturer filter:")
+    print(df.shape)
+    print(df.loc[df["device_id"] == "358035085192742"].shape)
    df = df[df["channel"].isin(channel)]
+    print("after channel filter:")
    print(df.shape)
+    print(df.loc[df["device_id"] == "358035085192742"].shape)
    df["cid_id"] = df["cid_id"].astype("str")
    df["clevel1_id"] = df["clevel1_id"].astype("str")
    df["top"] = df["top"].astype("str")
@@ -239,6 +248,8 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
    print(df.head(2))
    df = model.transform(df,n=160000, processes=22)
    df = pd.DataFrame(df)
+    print("after transform")
+    print(df.shape)
    df["label"] = df[0].apply(lambda x: x.split(",")[0])
    df["device_id"] = df[0].apply(lambda x: x.split(",")[1])
    df["city_id"] = df[0].apply(lambda x: x.split(",")[2])
@@ -251,14 +262,21 @@ def get_predict_set(ucity_id,model,ccity_name,manufacturer,channel):
    df = df.drop([0, "seq"], axis=1)
    print(df.head())
+    print(df.loc[df["device_id"] == "358035085192742"].shape)
    native_pre = df[df["label"] == "0"]
    native_pre = native_pre.drop("label", axis=1)
+    print("native")
+    print(native_pre.shape)
+    print(native_pre.loc[native_pre["device_id"] == "358035085192742"].shape)
    native_pre.to_csv(path+"native.csv",sep="\t",index=False)
    # print("native_pre shape")
    # print(native_pre.shape)
    nearby_pre = df[df["label"] == "1"]
    nearby_pre = nearby_pre.drop("label", axis=1)
+    print("nearby")
+    print(nearby_pre.shape)
+    print(nearby_pre.loc[nearby_pre["device_id"] == "358035085192742"].shape)
    nearby_pre.to_csv(path + "nearby.csv", sep="\t", index=False)
    # print("nearby_pre shape")
    # print(nearby_pre.shape)