try predict

f9fc2b16 · 赵威 · 68f0379a · f9fc2b16 · f9fc2b16 · f9fc2b16
Commit f9fc2b16 authored Jul 22, 2020 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 310 additions and 151 deletions

main.py src/main.py +80 -70

fe.py src/models/esmm/fe.py +189 -77

model.py src/models/esmm/model.py +41 -4

No files found.
--- a/src/main.py
+++ b/src/main.py
@@ -12,9 +12,10 @@ import tensorflow as tf
 from sklearn.model_selection import train_test_split
 from models.esmm.fe import (click_feature_engineering, device_feature_engineering, diary_feature_engineering,
-                            get_device_df_from_redis, get_diary_df_from_redis, join_device_diary, join_features, read_csv_data)
+                            get_device_dict_from_redis, get_diary_dict_from_redis, join_device_diary, join_features,
+                            read_csv_data)
 from models.esmm.input_fn import build_features, esmm_input_fn
-from models.esmm.model import esmm_model_fn, model_export, model_predict
+from models.esmm.model import (esmm_model_fn, model_export, model_predict, model_predict2)
 # tf.compat.v1.enable_eager_execution()
@@ -66,9 +67,10 @@ def main():
    # print("save to: " + save_path)
    save_path = "/home/gmuser/data/models/1595317247"
+    # save_path = str(Path("~/Desktop/models/1595297428").expanduser())
    predict_fn = tf.contrib.predictor.from_saved_model(save_path)
-    # for i in range(10):
+    # for i in range(5):
    #     test_300 = test_df.sample(300)
    #     model_predict(test_300, predict_fn)
@@ -78,73 +80,81 @@ def main():
    #     "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
    # ]
-    df = get_device_df_from_redis()
+    # df = get_device_df_from_redis()
-    df2 = get_diary_df_from_redis()
+    # df2 = get_diary_df_from_redis()
-    redis_device_df = device_feature_engineering(df)
+    # redis_device_df = device_feature_engineering(df)
-    redis_diary_df = diary_feature_engineering(df2, from_redis=True)
+    # redis_diary_df = diary_feature_engineering(df2, from_redis=True)
-    device_ids = list(redis_device_df["device_id"].values)[:20]
+    # device_ids = list(redis_device_df["device_id"].values)[:20]
-    diary_ids = list(redis_diary_df["card_id"].values)
+    # diary_ids = list(redis_diary_df["card_id"].values)
-    def test1():
+    device_dict = get_device_dict_from_redis()
-        time_1 = timeit.default_timer()
+    diary_dict = get_diary_dict_from_redis()
-        user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-        total_1 = (timeit.default_timer() - time_1)
+    device_ids = list(device_dict.keys())[:20]
-        print("join df cost {:.5f}s".format(total_1))
+    diary_ids = list(diary_dict.keys())
-        time_1 = timeit.default_timer()
+    model_predict2(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), device_dict, diary_dict, predict_fn)
-        model_predict(user1, predict_fn)
-        total_1 = (timeit.default_timer() - time_1)
+    # def test1():
-        print("total prediction cost {:.5f}s".format(total_1), "\n")
+    #     time_1 = timeit.default_timer()
+    #     user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-    def test2():
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("join df cost {:.5f}s".format(total_1))
-        user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-        total_1 = (timeit.default_timer() - time_1)
+    #     time_1 = timeit.default_timer()
-        print("join df cost {:.5f}s".format(total_1))
+    #     model_predict(user1, predict_fn)
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("total prediction cost {:.5f}s".format(total_1), "\n")
-        model_predict(user1, predict_fn)
-        total_1 = (timeit.default_timer() - time_1)
+    # def test2():
-        print("total prediction cost {:.5f}s".format(total_1), "\n")
+    #     time_1 = timeit.default_timer()
+    #     user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-    def test3():
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("join df cost {:.5f}s".format(total_1))
-        user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-        total_1 = (timeit.default_timer() - time_1)
+    #     time_1 = timeit.default_timer()
-        print("join df cost {:.5f}s".format(total_1))
+    #     model_predict(user1, predict_fn)
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("total prediction cost {:.5f}s".format(total_1), "\n")
-        model_predict(user1, predict_fn)
-        total_1 = (timeit.default_timer() - time_1)
+    # def test3():
-        print("total prediction cost {:.5f}s".format(total_1), "\n")
+    #     time_1 = timeit.default_timer()
+    #     user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-    def test4():
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("join df cost {:.5f}s".format(total_1))
-        user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-        total_1 = (timeit.default_timer() - time_1)
+    #     time_1 = timeit.default_timer()
-        print("join df cost {:.5f}s".format(total_1))
+    #     model_predict(user1, predict_fn)
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("total prediction cost {:.5f}s".format(total_1), "\n")
-        model_predict(user1, predict_fn)
-        total_1 = (timeit.default_timer() - time_1)
+    # def test4():
-        print("total prediction cost {:.5f}s".format(total_1), "\n")
+    #     time_1 = timeit.default_timer()
+    #     user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-    def test5():
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("join df cost {:.5f}s".format(total_1))
-        user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-        total_1 = (timeit.default_timer() - time_1)
+    #     time_1 = timeit.default_timer()
-        print("join df cost {:.5f}s".format(total_1))
+    #     model_predict(user1, predict_fn)
+    #     total_1 = (timeit.default_timer() - time_1)
-        time_1 = timeit.default_timer()
+    #     print("total prediction cost {:.5f}s".format(total_1), "\n")
-        model_predict(user1, predict_fn)
-        total_1 = (timeit.default_timer() - time_1)
+    # def test5():
-        print("total prediction cost {:.5f}s".format(total_1), "\n")
+    #     time_1 = timeit.default_timer()
+    #     user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
-    test1()
+    #     total_1 = (timeit.default_timer() - time_1)
-    test2()
+    #     print("join df cost {:.5f}s".format(total_1))
-    test3()
-    test4()
+    #     time_1 = timeit.default_timer()
-    test5()
+    #     model_predict(user1, predict_fn)
+    #     total_1 = (timeit.default_timer() - time_1)
+    #     print("total prediction cost {:.5f}s".format(total_1), "\n")
+    # test1()
+    # test2()
+    # test3()
+    # test4()
+    # test5()
    total_time = (time.time() - time_begin) / 60
    print("total cost {:.2f} mins at {}".format(total_time, datetime.now()))

--- a/src/models/esmm/fe.py
+++ b/src/models/esmm/fe.py
+import timeit
 import pandas as pd
 from utils.cache import redis_db_client
@@ -15,24 +17,65 @@ def read_csv_data(dataset_path):
    return device_df, diary_df, click_df, conversion_df
-def _get_data_from_redis(key):
+# def _get_data_from_redis(key):
-    column_key = key + ":column"
+#     column_key = key + ":column"
-    d = redis_db_client.hgetall(key)
+#     d = redis_db_client.hgetall(key)
-    tmp = d.values()
+#     tmp = d.values()
-    lists = []
+#     lists = []
-    for i in tmp:
+#     for i in tmp:
-        lists.append(str(i, "utf-8").split("|"))
+#         lists.append(str(i, "utf-8").split("|"))
-    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
+#     columns = str(redis_db_client.get(column_key), "utf-8").split("|")
-    df = pd.DataFrame(lists, columns=columns)
+#     df = pd.DataFrame(lists, columns=columns)
-    return df
+#     return df
-def get_device_df_from_redis():
-    return _get_data_from_redis("cvr:db:device")
-def get_diary_df_from_redis():
+def get_device_dict_from_redis():
-    return _get_data_from_redis("cvr:db:content:diary")
+    db_key = "cvr:db:device"
+    column_key = db_key + ":column"
+    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
+    d = redis_db_client.hgetall(db_key)
+    res = {}
+    for i in d.values():
+        row_list = str(i, "utf-8").split("|")
+        tmp = {}
+        for (index, elem) in enumerate(row_list):
+            col_name = columns[index]
+            if col_name in [
+                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
+                    "second_positions", "projects"
+            ]:
+                tmp[col_name] = elem.split(",")
+            else:
+                tmp[col_name] = elem
+            res[tmp["device_id"]] = tmp
+    return res
+def get_diary_dict_from_redis():
+    db_key = "cvr:db:content:diary"
+    column_key = db_key + ":column"
+    columns = str(redis_db_client.get(column_key), "utf-8").split("|")
+    d = redis_db_client.hgetall(db_key)
+    res = {}
+    for i in d.values():
+        row_list = str(i, "utf-8").split("|")
+        tmp = {}
+        for (index, elem) in enumerate(row_list):
+            col_name = columns[index]
+            if col_name in [
+                    "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
+                    "second_positions", "projects"
+            ]:
+                tmp[col_name] = elem.split(",")
+            elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
+                if elem == "true":
+                    tmp[col_name] = 1
+                else:
+                    tmp[col_name] = 0
+            else:
+                tmp[col_name] = elem
+            res[tmp["card_id"]] = tmp
+    return res
 def device_feature_engineering(df):
@@ -195,64 +238,133 @@ def join_features(device_df, diary_df, cc_df):
    return df
-def join_device_diary(device_id, diary_ids, device_df, diary_df):
+# def join_device_diary(device_id, diary_ids, device_df, diary_df):
-    a_df = device_df.loc[device_df["device_id"] == device_id]
+#     a_df = device_df.loc[device_df["device_id"] == device_id]
-    b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)]
+#     b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)]
-    b_df["device_id"] = device_id
+#     b_df["device_id"] = device_id
-    df = pd.merge(a_df, b_df, how="left", on="device_id")
+#     df = pd.merge(a_df, b_df, how="left", on="device_id")
-    df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)
+#     df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)
-    df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
-    df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
-    df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
-    df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
-    df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
-    df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
-    df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
+#     df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
-    df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
-    df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
-    df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
-    df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
-    df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
-    df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
-    df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))
+#     df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))
-    df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
+#     df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
-    df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
+#     df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
-    df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
+#     df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
-    df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
+#     df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
-    df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
+#     df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
-    df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
+#     df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
-    df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
+#     df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
-    df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
+#     df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
-    df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
+#     df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
-    df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
+#     df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
-    df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
+#     df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
-    df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
+#     df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
-    df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
+#     df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
-    df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
+#     df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
-    df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
+#     df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
-    df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
+#     df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
-    df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
+#     df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
-    df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
+#     df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
-    df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
+#     df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
-    df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
+#     df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
-    df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))
+#     df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))
-    drop_columns = [
+#     drop_columns = [
-        "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
+#         "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
-        "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
+#         "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
-        "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
+#         "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
-        "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
+#         "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
-    ]
+#     ]
-    df.drop(drop_columns, inplace=True, axis=1)
+#     df.drop(drop_columns, inplace=True, axis=1)
-    return df
+#     return df
+def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
+    time_1 = timeit.default_timer()
+    device_info = device_dict.get(device_id, {}).copy()
+    device_fd = device_info.get("first_demands", [])
+    device_sd = device_info.get("second_demands", [])
+    device_fs = device_info.get("first_solutions", [])
+    device_ss = device_info.get("second_solutions", [])
+    device_fp = device_info.get("first_positions", [])
+    device_sp = device_info.get("second_positions", [])
+    device_p = device_info.get("projects", [])
+    device_info["device_fd"] = nth_element(device_fd, 0)
+    device_info["device_sd"] = nth_element(device_sd, 0)
+    device_info["device_fs"] = nth_element(device_fs, 0)
+    device_info["device_ss"] = nth_element(device_ss, 0)
+    device_info["device_fp"] = nth_element(device_fp, 0)
+    device_info["device_sp"] = nth_element(device_sp, 0)
+    device_info["device_p"] = nth_element(device_p, 0)
+    diary_lst = []
+    for id in diary_ids:
+        tmp = diary_dict.get(id, {}).copy()
+        if tmp:
+            diary_fd = tmp.get("first_demands", [])
+            diary_sd = tmp.get("second_demands", [])
+            diary_fs = tmp.get("first_solutions", [])
+            diary_ss = tmp.get("second_solutions", [])
+            diary_fp = tmp.get("first_positions", [])
+            diary_sp = tmp.get("second_positions", [])
+            diary_p = tmp.get("projects", [])
+            common_fd = common_elements(device_fd, diary_fd)
+            common_sd = common_elements(device_sd, diary_sd)
+            common_fs = common_elements(device_fs, diary_fs)
+            common_ss = common_elements(device_ss, diary_ss)
+            common_fp = common_elements(device_fp, diary_fp)
+            common_sp = common_elements(device_sp, diary_sp)
+            common_p = common_elements(device_p, diary_p)
+            tmp["diary_fd"] = nth_element(diary_fd, 0)
+            tmp["diary_sd"] = nth_element(diary_sd, 0)
+            tmp["diary_fs"] = nth_element(diary_fs, 0)
+            tmp["diary_ss"] = nth_element(diary_ss, 0)
+            tmp["diary_fp"] = nth_element(diary_fp, 0)
+            tmp["diary_sp"] = nth_element(diary_sp, 0)
+            tmp["diary_p"] = nth_element(diary_p, 0)
+            tmp["fd1"] = nth_element(common_fd, 0)
+            tmp["fd2"] = nth_element(common_fd, 1)
+            tmp["fd3"] = nth_element(common_fd, 2)
+            tmp["sd1"] = nth_element(common_sd, 0)
+            tmp["sd2"] = nth_element(common_sd, 1)
+            tmp["sd3"] = nth_element(common_sd, 2)
+            tmp["fs1"] = nth_element(common_fs, 0)
+            tmp["fs2"] = nth_element(common_fs, 1)
+            tmp["fs3"] = nth_element(common_fs, 2)
+            tmp["ss1"] = nth_element(common_ss, 0)
+            tmp["ss2"] = nth_element(common_ss, 1)
+            tmp["ss3"] = nth_element(common_ss, 2)
+            tmp["fp1"] = nth_element(common_fp, 0)
+            tmp["fp2"] = nth_element(common_fp, 1)
+            tmp["fp3"] = nth_element(common_fp, 2)
+            tmp["sp1"] = nth_element(common_sp, 0)
+            tmp["sp2"] = nth_element(common_sp, 1)
+            tmp["sp3"] = nth_element(common_sp, 2)
+            tmp["p1"] = nth_element(common_p, 0)
+            tmp["p2"] = nth_element(common_p, 1)
+            tmp["p3"] = nth_element(common_p, 2)
+            diary_lst.append(tmp)
+    total_1 = (timeit.default_timer() - time_1)
+    print("join device diary cost {:.5f}s".format(total_1))
+    return device_info, diary_lst
--- a/src/models/esmm/model.py
+++ b/src/models/esmm/model.py
 import timeit
-import numba
 import tensorflow as tf
 from tensorflow import feature_column as fc
 from tensorflow.python.estimator.canned import head as head_lib
 from tensorflow.python.ops.losses import losses
+from .fe import device_diary_fe
+from .utils import common_elements, nth_element
 def build_deep_layer(net, params):
    for num_hidden_units in params["hidden_units"]:
@@ -92,6 +94,41 @@ def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+def model_predict2(device_id, diary_ids, device_dict, diary_dict, predict_fn):
+    time_1 = timeit.default_timer()
+    device_info, diary_lst = device_diary_fe(device_id, diary_ids, device_dict, diary_dict)
+    int_columns = [
+        "active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
+        "topic_num", "favor_num", "vote_num"
+    ]
+    float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
+    str_columns = [
+        "device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
+        "device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
+        "content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3",
+        "ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3"
+    ]
+    examples = []
+    for diary_info in diary_lst:
+        tmp = {}
+        tmp.update(device_info)
+        tmp.update(diary_info)
+        features = {}
+        for (col, value) in tmp.items():
+            if col in int_columns:
+                features[col] = _int64_feature(int(value))
+            elif col in float_columns:
+                features[col] = _float_feature(float(value))
+            elif col in str_columns:
+                features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
+        example = tf.train.Example(features=tf.train.Features(feature=features))
+        examples.append(example.SerializeToString())
+    predictions = predict_fn({"examples": examples})
+    total_1 = (timeit.default_timer() - time_1)
+    print("prediction cost {:.5f}s".format(total_1))
+    return predictions
 def model_predict(inputs, predict_fn):
    time_1 = timeit.default_timer()
    int_columns = [
@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn):
            if col in ["click_label", "conversion_label"]:
                pass
            elif col in int_columns:
-                features[col] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)]))
+                features[col] = _int64_feature(int(value))
            elif col in float_columns:
-                features[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[float(value)]))
+                features[col] = _float_feature(float(value))
            else:
-                features[col] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value).encode(encoding="utf-8")]))
+                features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
        example = tf.train.Example(features=tf.train.Features(feature=features))
        examples.append(example.SerializeToString())
    total_1 = (timeit.default_timer() - time_1)