Commit 250f1b3f authored by 赵威's avatar 赵威

train tractate

parent 34646d86
...@@ -19,63 +19,61 @@ from utils.cache import get_essm_model_save_path, set_essm_model_save_path ...@@ -19,63 +19,61 @@ from utils.cache import get_essm_model_save_path, set_essm_model_save_path
def main(): def main():
time_begin = time.time() time_begin = time.time()
# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
# tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS) tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS)
# print("features: " + str(len(tractate_train_columns))) print("features: " + str(len(tractate_train_columns)))
# tractate_predict_columns = set(PREDICTION_ALL_COLUMNS) tractate_predict_columns = set(PREDICTION_ALL_COLUMNS)
# print(tractate_predict_columns.difference(tractate_train_columns)) print(tractate_predict_columns.difference(tractate_train_columns))
# print(tractate_train_columns.difference(tractate_predict_columns)) print(tractate_train_columns.difference(tractate_predict_columns))
# assert tractate_predict_columns == tractate_train_columns assert tractate_predict_columns == tractate_train_columns
# # dataset_path = Path("~/data/cvr_data").expanduser() # local # dataset_path = Path("~/data/cvr_data").expanduser() # local
# dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
# tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path) tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path)
# tractate_df = tractate_fe.tractate_feature_engineering(tractate_df) tractate_df = tractate_fe.tractate_feature_engineering(tractate_df)
# device_df = device_fe.read_csv_data(dataset_path) device_df = device_fe.read_csv_data(dataset_path)
# device_df = device_fe.device_feature_engineering(device_df, "tractate") device_df = device_fe.device_feature_engineering(device_df, "tractate")
# # print(device_df.columns) # print(device_df.columns)
# # print(device_df.dtypes, "\n") # print(device_df.dtypes, "\n")
# cc_df = click_fe.click_feature_engineering(tractate_click_df, tractate_conversion_df) cc_df = click_fe.click_feature_engineering(tractate_click_df, tractate_conversion_df)
# df = tractate_fe.join_features(device_df, tractate_df, cc_df) df = tractate_fe.join_features(device_df, tractate_df, cc_df)
# # for i in df.columns: # for i in df.columns:
# # print(i) # print(i)
# # print(df.dtypes) # print(df.dtypes)
# train_df, test_df = train_test_split(df, test_size=0.2) train_df, test_df = train_test_split(df, test_size=0.2)
# train_df, val_df = train_test_split(train_df, test_size=0.2) train_df, val_df = train_test_split(train_df, test_size=0.2)
# all_features = fe.build_features(df, tractate_fe.INT_COLUMNS, tractate_fe.FLOAT_COLUMNS, tractate_fe.CATEGORICAL_COLUMNS) all_features = fe.build_features(df, tractate_fe.INT_COLUMNS, tractate_fe.FLOAT_COLUMNS, tractate_fe.CATEGORICAL_COLUMNS)
# params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1} params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
# model_path = str(Path("/data/files/model_tmp/tractate/").expanduser()) model_path = str(Path("/data/files/model_tmp/tractate/").expanduser())
# if os.path.exists(model_path): if os.path.exists(model_path):
# shutil.rmtree(model_path) shutil.rmtree(model_path)
# session_config = tf.compat.v1.ConfigProto() session_config = tf.compat.v1.ConfigProto()
# session_config.gpu_options.allow_growth = True session_config.gpu_options.allow_growth = True
# session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
# estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
# model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
# # TODO 50000 train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
# train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
# eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
# res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec) print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
# print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print(res[0])
# print(res[0]) print("ctr_auc: " + str(res[0]["ctr_auc"]))
# print("ctr_auc: " + str(res[0]["ctr_auc"])) print("ctcvr_auc: " + str(res[0]["ctcvr_auc"]))
# print("ctcvr_auc: " + str(res[0]["ctcvr_auc"])) print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
# print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
model_export_path = str(Path("/data/files/models/tractate/").expanduser())
# model_export_path = str(Path("/data/files/models/tractate/").expanduser()) save_path = model_export(model, all_features, model_export_path)
# save_path = model_export(model, all_features, model_export_path) print("save to: " + save_path)
# print("save to: " + save_path) set_essm_model_save_path("tractate", save_path)
# # TODO save model print("============================================================")
# # set_essm_model_save_path("tractate", save_path)
# print("============================================================") save_path = get_essm_model_save_path("tractate")
save_path = get_essm_model_save_path("diary")
print("load path: " + save_path) print("load path: " + save_path)
# # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local # # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
...@@ -87,9 +85,8 @@ def main(): ...@@ -87,9 +85,8 @@ def main():
device_ids = list(device_dict.keys())[:20] device_ids = list(device_dict.keys())[:20]
tractate_ids = list(tractate_dict.keys()) tractate_ids = list(tractate_dict.keys())
# TODO printer
# print(device_dict[device_ids[0]], "\n") # print(device_dict[device_ids[0]], "\n")
print(tractate_dict[tractate_ids[0]], "\n") # print(tractate_dict[tractate_ids[0]], "\n")
for i in range(5): for i in range(5):
time_1 = timeit.default_timer() time_1 = timeit.default_timer()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment