predict

757dd647 · 赵威 · 7389f013 · 757dd647
Commit 757dd647 authored Jul 22, 2020 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 89 deletions

model.py src/models/esmm/model.py +15 -89

No files found.
--- a/src/models/esmm/model.py
+++ b/src/models/esmm/model.py
@@ -97,6 +97,7 @@ def _bytes_feature(value):

 def model_predict2(device_id, diary_ids, device_dict, diary_dict, predict_fn):
    time_1 = timeit.default_timer()
+    device_info, diary_lst = device_diary_fe(device_id, diary_ids, device_dict, diary_dict)
    int_columns = [
        "active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
        "topic_num", "favor_num", "vote_num"
@@ -108,96 +109,21 @@ def model_predict2(device_id, diary_ids, device_dict, diary_dict, predict_fn):
        "content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3",
        "ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3"
    ]
-
-    device_info = device_dict.get(device_id, {}).copy()
-    device_fd = device_info.get("first_demands", [])
-    device_sd = device_info.get("second_demands", [])
-    device_fs = device_info.get("first_solutions", [])
-    device_ss = device_info.get("second_solutions", [])
-    device_fp = device_info.get("first_positions", [])
-    device_sp = device_info.get("second_positions", [])
-    device_p = device_info.get("projects", [])
-    device_info["device_fd"] = nth_element(device_fd, 0)
-    device_info["device_sd"] = nth_element(device_sd, 0)
-    device_info["device_fs"] = nth_element(device_fs, 0)
-    device_info["device_ss"] = nth_element(device_ss, 0)
-    device_info["device_fp"] = nth_element(device_fp, 0)
-    device_info["device_sp"] = nth_element(device_sp, 0)
-    device_info["device_p"] = nth_element(device_p, 0)
-    # diary_lst = []
-
    examples = []
-    for id in diary_ids:
-        diary_info = diary_dict.get(id, {}).copy()
-        if diary_info:
-            diary_fd = diary_info.get("first_demands", [])
-            diary_sd = diary_info.get("second_demands", [])
-            diary_fs = diary_info.get("first_solutions", [])
-            diary_ss = diary_info.get("second_solutions", [])
-            diary_fp = diary_info.get("first_positions", [])
-            diary_sp = diary_info.get("second_positions", [])
-            diary_p = diary_info.get("projects", [])
-            common_fd = common_elements(device_fd, diary_fd)
-            common_sd = common_elements(device_sd, diary_sd)
-            common_fs = common_elements(device_fs, diary_fs)
-            common_ss = common_elements(device_ss, diary_ss)
-            common_fp = common_elements(device_fp, diary_fp)
-            common_sp = common_elements(device_sp, diary_sp)
-            common_p = common_elements(device_p, diary_p)
-            diary_info["content_fd"] = nth_element(diary_fd, 0)
-            diary_info["content_sd"] = nth_element(diary_sd, 0)
-            diary_info["content_fs"] = nth_element(diary_fs, 0)
-            diary_info["content_ss"] = nth_element(diary_ss, 0)
-            diary_info["content_fp"] = nth_element(diary_fp, 0)
-            diary_info["content_sp"] = nth_element(diary_sp, 0)
-            diary_info["content_p"] = nth_element(diary_p, 0)
-            diary_info["fd1"] = nth_element(common_fd, 0)
-            diary_info["fd2"] = nth_element(common_fd, 1)
-            diary_info["fd3"] = nth_element(common_fd, 2)
-            diary_info["sd1"] = nth_element(common_sd, 0)
-            diary_info["sd2"] = nth_element(common_sd, 1)
-            diary_info["sd3"] = nth_element(common_sd, 2)
-            diary_info["fs1"] = nth_element(common_fs, 0)
-            diary_info["fs2"] = nth_element(common_fs, 1)
-            diary_info["fs3"] = nth_element(common_fs, 2)
-            diary_info["ss1"] = nth_element(common_ss, 0)
-            diary_info["ss2"] = nth_element(common_ss, 1)
-            diary_info["ss3"] = nth_element(common_ss, 2)
-            diary_info["fp1"] = nth_element(common_fp, 0)
-            diary_info["fp2"] = nth_element(common_fp, 1)
-            diary_info["fp3"] = nth_element(common_fp, 2)
-            diary_info["sp1"] = nth_element(common_sp, 0)
-            diary_info["sp2"] = nth_element(common_sp, 1)
-            diary_info["sp3"] = nth_element(common_sp, 2)
-            diary_info["p1"] = nth_element(common_p, 0)
-            diary_info["p2"] = nth_element(common_p, 1)
-            diary_info["p3"] = nth_element(common_p, 2)
-            # diary_lst.append(diary_info)
-            tmp = {}
-            tmp.update(device_info)
-            tmp.update(diary_info)
-            features = {}
-            for col in int_columns:
-                features[col] = _int64_feature(int(tmp[col]))
-            for col in float_columns:
-                features[col] = _float_feature(float(tmp[col]))
-            for col in str_columns:
-                features[col] = _bytes_feature(str(tmp[col]).encode(encoding="utf-8"))
-            example = tf.train.Example(features=tf.train.Features(feature=features))
-            examples.append(example.SerializeToString())
-
-    #     tmp = {}
-    #     tmp.update(device_info)
-    #     tmp.update(diary_info)
-    #     features = {}
-    #     for col in int_columns:
-    #         features[col] = _int64_feature(int(tmp[col]))
-    #     for col in float_columns:
-    #         features[col] = _float_feature(float(tmp[col]))
-    #     for col in str_columns:
-    #         features[col] = _bytes_feature(tmp[col])
-    #     example = tf.train.Example(features=tf.train.Features(feature=features))
-    #     examples.append(example.SerializeToString())
+    for diary_info in diary_lst:
+        tmp = {}
+        tmp.update(device_info)
+        tmp.update(diary_info)
+        features = {}
+        for col in int_columns:
+            features[col] = _int64_feature(int(tmp[col]))
+        for col in float_columns:
+            features[col] = _float_feature(float(tmp[col]))
+        for col in str_columns:
+            features[col] = _bytes_feature(str(tmp[col]).encode(encoding="utf-8"))
+        example = tf.train.Example(features=tf.train.Features(feature=features))
+        examples.append(example.SerializeToString())
+
    total_1 = (timeit.default_timer() - time_1)
    print("make example cost {:.5f}s".format(total_1))