Commit 34560c08 authored by 赵威's avatar 赵威

add ctr feature for diary

parent 8ee5f546
......@@ -7,9 +7,9 @@ from .model import _bytes_feature, _float_feature, _int64_feature
_int_columns = [
"active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
"topic_num", "favor_num", "vote_num"
"topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num"
]
_float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
_float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
_categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
......
......@@ -6,12 +6,13 @@ from utils.cache import redis_db_client
from ..utils import common_elements, nth_element
DIARY_COLUMNS = [
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "favor_num", "vote_num",
"one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "topic_seven_click_num",
"topic_thirty_click_num", "favor_num", "vote_num", "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr",
"sixty_ctr", "ninety_ctr", "history_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions",
"first_positions", "second_positions", "projects"
]
INT_COLUMNS = ["active_days", "topic_num", "favor_num", "vote_num"]
FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
INT_COLUMNS = ["active_days", "topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num"]
FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2",
......
......@@ -23,13 +23,13 @@ def main():
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# data_path = Path("~/data/cvr_data").expanduser() # local
data_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(data_path)
# dataset_path = Path("~/data/cvr_data").expanduser() # local
dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path)
# print(diary_df.sample(1))
diary_df = diary_fe.diary_feature_engineering(diary_df)
# print(diary_df.sample(1))
device_df = device_fe.read_csv_data(data_path)
device_df = device_fe.read_csv_data(dataset_path)
# print(diary_df.sample(1))
device_df = device_fe.device_feature_engineering(device_df, "diary")
# print(device_df.sample(1))
......@@ -54,14 +54,16 @@ def main():
estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
# TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
model_export_path = str(Path("~/data/models/diary").expanduser())
save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path)
set_essm_model_save_path("diary", save_path)
# TODO save model
# set_essm_model_save_path("diary", save_path)
diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
diary_predict_columns = set(PREDICTION_ALL_COLUMNS)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment