Commit aeba770f authored by 赵威's avatar 赵威

add reply number

parent bee3dbb8
...@@ -7,7 +7,10 @@ from .model import _bytes_feature, _float_feature, _int64_feature ...@@ -7,7 +7,10 @@ from .model import _bytes_feature, _float_feature, _int64_feature
_int_columns = [ _int_columns = [
"active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
"topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num" "topic_num", "favor_num", "favor_pure_num", "vote_num", "reply_num", "reply_pure_num", "one_reply_user_num",
"three_reply_user_num", "seven_reply_user_num", "fifteen_reply_user_num", "thirty_reply_user_num", "sixty_reply_user_num",
"ninety_reply_user_num", "history_reply_user_num", "topic_seven_click_num", "topic_thirty_click_num", "seven_transform_num",
"thirty_transform_num"
] ]
_float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"] _float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
_categorical_columns = [ _categorical_columns = [
......
...@@ -7,11 +7,18 @@ from ..utils import common_elements, nth_element ...@@ -7,11 +7,18 @@ from ..utils import common_elements, nth_element
DIARY_COLUMNS = [ DIARY_COLUMNS = [
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "topic_seven_click_num", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "topic_num", "topic_seven_click_num",
"topic_thirty_click_num", "favor_num", "vote_num", "one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "topic_thirty_click_num", "seven_transform_num", "thirty_transform_num", "favor_num", "favor_pure_num", "vote_num",
"sixty_ctr", "ninety_ctr", "history_ctr", "first_demands", "second_demands", "first_solutions", "second_solutions", "reply_num", "reply_pure_num", "one_reply_user_num", "three_reply_user_num", "seven_reply_user_num", "fifteen_reply_user_num",
"first_positions", "second_positions", "projects" "thirty_reply_user_num", "sixty_reply_user_num", "ninety_reply_user_num", "history_reply_user_num", "one_ctr", "three_ctr",
"seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr", "first_demands", "second_demands",
"first_solutions", "second_solutions", "first_positions", "second_positions", "projects"
]
INT_COLUMNS = [
"active_days", "topic_num", "favor_num", "favor_pure_num", "vote_num", "reply_num", "reply_pure_num", "one_reply_user_num",
"three_reply_user_num", "seven_reply_user_num", "fifteen_reply_user_num", "thirty_reply_user_num", "sixty_reply_user_num",
"ninety_reply_user_num", "history_reply_user_num", "topic_seven_click_num", "topic_thirty_click_num", "seven_transform_num",
"thirty_transform_num"
] ]
INT_COLUMNS = ["active_days", "topic_num", "favor_num", "vote_num", "topic_seven_click_num", "topic_thirty_click_num"]
FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"] FLOAT_COLUMNS = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr", "thirty_ctr", "sixty_ctr", "ninety_ctr", "history_ctr"]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
......
...@@ -23,6 +23,12 @@ def main(): ...@@ -23,6 +23,12 @@ def main():
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
diary_predict_columns = set(PREDICTION_ALL_COLUMNS)
print(diary_predict_columns.difference(diary_train_columns))
print(diary_train_columns.difference(diary_predict_columns))
assert diary_predict_columns == diary_train_columns
# dataset_path = Path("~/data/cvr_data").expanduser() # local # dataset_path = Path("~/data/cvr_data").expanduser() # local
dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path) diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path)
...@@ -57,7 +63,10 @@ def main(): ...@@ -57,7 +63,10 @@ def main():
# TODO 50000 # TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000) train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
tf.estimator.train_and_evaluate(model, train_spec, eval_spec) res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@")
print(res)
print("@@@@@@@@@@")
model_export_path = str(Path("~/data/models/diary").expanduser()) model_export_path = str(Path("~/data/models/diary").expanduser())
save_path = model_export(model, all_features, model_export_path) save_path = model_export(model, all_features, model_export_path)
...@@ -65,12 +74,6 @@ def main(): ...@@ -65,12 +74,6 @@ def main():
# TODO save model # TODO save model
# set_essm_model_save_path("diary", save_path) # set_essm_model_save_path("diary", save_path)
diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
diary_predict_columns = set(PREDICTION_ALL_COLUMNS)
print(diary_predict_columns.difference(diary_train_columns))
print(diary_train_columns.difference(diary_predict_columns))
assert diary_predict_columns == diary_train_columns
print("============================================================") print("============================================================")
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local # save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
# save_path = "/home/gmuser/data/models/diary/1596083349" # server # save_path = "/home/gmuser/data/models/diary/1596083349" # server
......
...@@ -21,11 +21,17 @@ def main(): ...@@ -21,11 +21,17 @@ def main():
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
# data_path = Path("~/data/cvr_data").expanduser() # local tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS)
data_path = Path("/srv/apps/node2vec_git/cvr_data/") # server tractate_predict_columns = set(PREDICTION_ALL_COLUMNS)
tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(data_path) print(tractate_predict_columns.difference(tractate_train_columns))
print(tractate_train_columns.difference(tractate_predict_columns))
assert tractate_predict_columns == tractate_train_columns
# dataset_path = Path("~/data/cvr_data").expanduser() # local
dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path)
tractate_df = tractate_fe.tractate_feature_engineering(tractate_df) tractate_df = tractate_fe.tractate_feature_engineering(tractate_df)
device_df = device_fe.read_csv_data(data_path) device_df = device_fe.read_csv_data(dataset_path)
device_df = device_fe.device_feature_engineering(device_df, "tractate") device_df = device_fe.device_feature_engineering(device_df, "tractate")
# print(device_df.columns) # print(device_df.columns)
# print(device_df.dtypes, "\n") # print(device_df.dtypes, "\n")
...@@ -60,12 +66,6 @@ def main(): ...@@ -60,12 +66,6 @@ def main():
print("save to: " + save_path) print("save to: " + save_path)
set_essm_model_save_path("tractate", save_path) set_essm_model_save_path("tractate", save_path)
tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS)
tractate_predict_columns = set(PREDICTION_ALL_COLUMNS)
print(tractate_predict_columns.difference(tractate_train_columns))
print(tractate_train_columns.difference(tractate_predict_columns))
assert tractate_predict_columns == tractate_train_columns
print("============================================================") print("============================================================")
# # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local # # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment