add ctr feature for diary

34560c08 · 赵威 · 8ee5f546 · 34560c08 · 34560c08 · 34560c08
Commit 34560c08 authored Aug 13, 2020 by 赵威
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 12 deletions

diary_model.py src/models/esmm/diary_model.py +2 -2

diary_fe.py src/models/esmm/fe/diary_fe.py +5 -4

train_diary.py src/train_diary.py +8 -6

No files found.
--- a/src/models/esmm/diary_model.py
+++ b/src/models/esmm/diary_model.py
@@ -7,9 +7,9 @@ from .model import _bytes_feature, _float_feature, _int64_feature

 _int_columns = [
    "active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
-    "topic_num", "favor_num", "vote_num"
+    "topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num"
 ]
-_float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
+_float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
 _categorical_columns = [
    "device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
    "device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",

--- a/src/models/esmm/fe/diary_fe.py
+++ b/src/models/esmm/fe/diary_fe.py
@@ -6,12 +6,13 @@ from utils.cache import redis_db_client
 from ..utils import common_elements, nth_element

 DIARY_COLUMNS = [
-    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "favor_num", "vote_num",
-    "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions",
+    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "topic_seven_click_num",
+    "topic_thirty_click_num", "favor_num", "vote_num", "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr",
+    "sixty_ctr", "ninety_ctr", "history_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions",
    "first_positions", "second_positions", "projects"
 ]
-INT_COLUMNS = ["active_days", "topic_num", "favor_num", "vote_num"]
-FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
+INT_COLUMNS = ["active_days", "topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num"]
+FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
 CATEGORICAL_COLUMNS = [
    "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
    "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2",

--- a/src/train_diary.py
+++ b/src/train_diary.py
@@ -23,13 +23,13 @@ def main():

    # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

-    # data_path = Path("~/data/cvr_data").expanduser()  # local
-    data_path = Path("/srv/apps/node2vec_git/cvr_data/")  # server
-    diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(data_path)
+    # dataset_path = Path("~/data/cvr_data").expanduser()  # local
+    dataset_path = Path("/srv/apps/node2vec_git/cvr_data/")  # server
+    diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path)
    # print(diary_df.sample(1))
    diary_df = diary_fe.diary_feature_engineering(diary_df)
    # print(diary_df.sample(1))
-    device_df = device_fe.read_csv_data(data_path)
+    device_df = device_fe.read_csv_data(dataset_path)
    # print(diary_df.sample(1))
    device_df = device_fe.device_feature_engineering(device_df, "diary")
    # print(device_df.sample(1))
@@ -54,14 +54,16 @@ def main():
    estimator_config = tf.estimator.RunConfig(session_config=session_config)

    model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
-    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
+    # TODO 50000
+    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)

    model_export_path = str(Path("~/data/models/diary").expanduser())
    save_path = model_export(model, all_features, model_export_path)
    print("save to: " + save_path)
-    set_essm_model_save_path("diary", save_path)
+    # TODO save model
+    # set_essm_model_save_path("diary", save_path)

    diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
    diary_predict_columns = set(PREDICTION_ALL_COLUMNS)