tag number for tractate

732bf4b4 · 赵威 · a522c51d · 732bf4b4 · 732bf4b4
Commit 732bf4b4 authored Aug 17, 2020 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 29 additions and 4 deletions

tractate_fe.py src/models/esmm/fe/tractate_fe.py +23 -0

train_tractate.py src/train_tractate.py +6 -4

No files found.
--- a/src/models/esmm/fe/tractate_fe.py
+++ b/src/models/esmm/fe/tractate_fe.py
@@ -93,6 +93,13 @@ TRACTATE_COLUMNS = [
    "first_positions",
    "second_positions",
    "projects",
+    "first_demands_num",
+    "second_demands_num",
+    "first_solutions_num",
+    "second_solutions_num",
+    "first_positions_num",
+    "second_positions_num",
+    "projects_num",
 ]

 INT_COLUMNS = [
@@ -147,6 +154,13 @@ INT_COLUMNS = [
    "sixty_browse_user_num",
    "ninety_browse_user_num",
    "history_browse_user_num",
+    "first_demands_num",
+    "second_demands_num",
+    "first_solutions_num",
+    "second_solutions_num",
+    "first_positions_num",
+    "second_positions_num",
+    "projects_num",
 ]
 FLOAT_COLUMNS = [
    "one_ctr",
@@ -210,6 +224,7 @@ def get_tractate_dict_from_redis():
                    "second_positions", "projects"
            ]:
                tmp[col_name] = elem.split(",")
+                tmp[col_name + "_num"] = len(tmp[col_name])
            elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
                if elem == "true":
                    tmp[col_name] = 1
@@ -240,6 +255,14 @@ def tractate_feature_engineering(tractate_df):
    df["second_positions"] = df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
    df["projects"] = df["projects"].apply(lambda d: d if isinstance(d, list) else [])

+    df["first_demands_num"] = df["first_demands"].apply(lambda d: len(d))
+    df["second_demands_num"] = df["second_demands"].apply(lambda d: len(d))
+    df["first_solutions_num"] = df["first_solutions"].apply(lambda d: len(d))
+    df["second_solutions_num"] = df["second_solutions"].apply(lambda d: len(d))
+    df["first_positions_num"] = df["first_positions"].apply(lambda d: len(d))
+    df["second_positions_num"] = df["second_positions"].apply(lambda d: len(d))
+    df["projects_num"] = df["projects"].apply(lambda d: len(d))
+
    df["is_pure_author"] = df["is_pure_author"].astype(int)
    df["is_have_pure_reply"] = df["is_have_pure_reply"].astype(int)
    df["is_have_reply"] = df["is_have_reply"].astype(int)

--- a/src/train_tractate.py
+++ b/src/train_tractate.py
@@ -58,7 +58,8 @@ def main():
    estimator_config = tf.estimator.RunConfig(session_config=session_config)

    model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
-    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
+    # TODO 50000
+    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
    res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
@@ -70,7 +71,8 @@ def main():
    model_export_path = str(Path("/data/files/models/tractate/").expanduser())
    save_path = model_export(model, all_features, model_export_path)
    print("save to: " + save_path)
-    set_essm_model_save_path("tractate", save_path)
+    # TODO save model
+    # set_essm_model_save_path("tractate", save_path)
    print("============================================================")

    # # save_path = str(Path("~/data/models/tractate/1596089465").expanduser())  # local
@@ -84,9 +86,9 @@ def main():
    device_ids = list(device_dict.keys())[:20]
    tractate_ids = list(tractate_dict.keys())

-    # print(len(device_dict), len(tractate_dict), "\n")
+    # TODO printer
    # print(device_dict[device_ids[0]], "\n")
-    # print(tractate_dict[tractate_ids[0]], "\n")
+    print(tractate_dict[tractate_ids[0]], "\n")

    for i in range(5):
        time_1 = timeit.default_timer()