try diary cross feature

f86662b3 · 赵威 · 084806b0 · f86662b3 · f86662b3
Commit f86662b3 authored Sep 01, 2020 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 3 deletions

fe.py src/models/esmm/fe/fe.py +6 -0

train_diary.py src/train_diary.py +5 -3

No files found.
--- a/src/models/esmm/fe/fe.py
+++ b/src/models/esmm/fe/fe.py
@@ -33,5 +33,11 @@ def build_features(df, int_columns, float_columns, categorical_columns):
            categorical_features.append(
                fc.indicator_column(fc.categorical_column_with_vocabulary_list(col, create_vocabulary_list(df, col))))
+    # TODO try cross feature
+    a = fc.categorical_column_with_vocabulary_list("device_fd", create_vocabulary_list(df, "device_fd"))
+    b = fc.categorical_column_with_vocabulary_list("content_fd", create_vocabulary_list(df, "content_fd"))
+    c = tf.feature_column.crossed_column([a, b], hash_bucket_size=10)
+    categorical_features.append(tf.feature_column.indicator_column(c))
    all_features = (numeric_features + categorical_features)
    return all_features
--- a/src/train_diary.py
+++ b/src/train_diary.py
@@ -50,7 +50,7 @@ def main():
    train_df, val_df = train_test_split(train_df, test_size=0.2)
    all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS)
-    params = {"feature_columns": all_features, "hidden_units": [200, 80, 2], "learning_rate": 0.2}
+    params = {"feature_columns": all_features, "hidden_units": [360, 200, 80, 2], "learning_rate": 0.2}
    model_path = str(Path("/data/files/model_tmp/diary/").expanduser())
    if os.path.exists(model_path):
        shutil.rmtree(model_path)
@@ -61,7 +61,8 @@ def main():
    estimator_config = tf.estimator.RunConfig(session_config=session_config)
    model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
-    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
+    # TODO 50000
+    train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=15000)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
    res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
    print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
@@ -73,7 +74,8 @@ def main():
    model_export_path = str(Path("/data/files/models/diary").expanduser())
    save_path = model_export(model, all_features, model_export_path)
    print("save to: " + save_path)
-    set_essm_model_save_path("diary", save_path)
+    # TODO save model
+    # set_essm_model_save_path("diary", save_path)
    print("============================================================")
    # save_path = str(Path("~/Desktop/models/1596012827").expanduser())  # local