修改esmm写入数据库时nearby的日记日记队列数量

91c4f980 · 张彦钊 · c9703d99 · 91c4f980 · 91c4f980
Commit 91c4f980 authored Jun 03, 2019 by 张彦钊
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 8 deletions

to_database.py eda/esmm/Model_pipline/to_database.py +4 -8

train.py eda/esmm/Model_pipline/train.py +0 -0

No files found.
--- a/eda/esmm/Model_pipline/to_database.py
+++ b/eda/esmm/Model_pipline/to_database.py
@@ -19,12 +19,7 @@ def con_sql(sql):
    return result


-def nearby_set_join(lst):
-    # return ','.join([str(i) for i in list(lst)])
-    return ','.join([str(i) for i in lst.unique().tolist()])
-
-
-def native_set_join(lst):
+def set_join(lst):
    l = lst.unique().tolist()
    r = [str(i) for i in l]
    r =r[:500]
@@ -40,7 +35,7 @@ def main():
    df1 = pd.read_csv(path+"/native/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
    df2["ctr"],df2["cvr"],df2["ctcvr"] = df1["ctr"],df1["cvr"],df1["ctcvr"]
    df3 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
-        .reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':native_set_join}).reset_index(drop=False)
+        .reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
    df3.columns = ["device_id","city_id","native_queue"]
    print("native_device_count",df3.shape)

@@ -51,7 +46,8 @@ def main():

    df1 = pd.read_csv(path+"/nearby/pred.txt",sep='\t',header=None,names=["ctr","cvr","ctcvr"])
    df2["ctr"], df2["cvr"], df2["ctcvr"] = df1["ctr"], df1["cvr"], df1["ctcvr"]
-    df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False)).reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':nearby_set_join}).reset_index(drop=False)
+    df4 = df2.groupby(by=["uid","city"]).apply(lambda x: x.sort_values(by="ctcvr",ascending=False))\
+        .reset_index(drop=True).groupby(by=["uid","city"]).agg({'cid_id':set_join}).reset_index(drop=False)
    df4.columns = ["device_id","city_id","nearby_queue"]
    print("nearby_device_count",df4.shape)


--- a/eda/esmm/Model_pipline/train.py
+++ b/eda/esmm/Model_pipline/train.py