Commit c7296fd5 authored by 赵威's avatar 赵威

update tractate price type

parent 11181e9c
...@@ -92,6 +92,7 @@ _int_columns = [ ...@@ -92,6 +92,7 @@ _int_columns = [
"business_second_skip_num", "business_second_skip_num",
"service_price", "service_price",
"service_sold_num", "service_sold_num",
"recommend_service_price",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -178,7 +179,6 @@ _categorical_columns = [ ...@@ -178,7 +179,6 @@ _categorical_columns = [
"service_city", "service_city",
"recommend_service_id", "recommend_service_id",
"recommend_service_city", "recommend_service_city",
"recommend_service_price",
"device_fd2", "device_fd2",
"device_sd2", "device_sd2",
"device_fs2", "device_fs2",
......
...@@ -425,7 +425,7 @@ def diary_feature_engineering(df): ...@@ -425,7 +425,7 @@ def diary_feature_engineering(df):
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int) diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df["service_id"] = diary_df["service_id"].astype(str) diary_df["service_id"] = diary_df["service_id"].astype(str)
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str) diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str)
diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(int) diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str)
diary_df["service_id"] = diary_df["service_city"].fillna("-1") diary_df["service_id"] = diary_df["service_city"].fillna("-1")
diary_df["service_city"] = diary_df["service_city"].fillna("") diary_df["service_city"] = diary_df["service_city"].fillna("")
......
...@@ -411,7 +411,7 @@ def tractate_feature_engineering(tractate_df): ...@@ -411,7 +411,7 @@ def tractate_feature_engineering(tractate_df):
df["is_related_service"] = df["is_related_service"].astype(int) df["is_related_service"] = df["is_related_service"].astype(int)
df["service_id"] = df["service_id"].astype(str) df["service_id"] = df["service_id"].astype(str)
df["recommend_service_id"] = df["recommend_service_id"].astype(str) df["recommend_service_id"] = df["recommend_service_id"].astype(str)
df["recommend_service_price"] = df["recommend_service_price"].astype(int) df["recommend_service_price"] = df["recommend_service_price"].astype(str)
df["service_id"] = df["service_city"].fillna("-1") df["service_id"] = df["service_city"].fillna("-1")
df["service_city"] = df["service_city"].fillna("") df["service_city"] = df["service_city"].fillna("")
......
...@@ -83,6 +83,7 @@ _int_columns = [ ...@@ -83,6 +83,7 @@ _int_columns = [
"business_second_skip_num", "business_second_skip_num",
"service_price", "service_price",
"service_sold_num", "service_sold_num",
"recommend_service_price",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -170,7 +171,6 @@ _categorical_columns = [ ...@@ -170,7 +171,6 @@ _categorical_columns = [
"service_city", "service_city",
"recommend_service_id", "recommend_service_id",
"recommend_service_city", "recommend_service_city",
"recommend_service_price",
# "device_fd2", # "device_fd2",
# "device_sd2", # "device_sd2",
# "device_fs2", # "device_fs2",
......
...@@ -22,72 +22,72 @@ def main(): ...@@ -22,72 +22,72 @@ def main():
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS) # tractate_train_columns = set(tractate_fe.INT_COLUMNS + tractate_fe.FLOAT_COLUMNS + tractate_fe.CATEGORICAL_COLUMNS)
print("features: " + str(len(tractate_train_columns))) # print("features: " + str(len(tractate_train_columns)))
tractate_predict_columns = set(PREDICTION_ALL_COLUMNS) # tractate_predict_columns = set(PREDICTION_ALL_COLUMNS)
print(tractate_predict_columns.difference(tractate_train_columns)) # print(tractate_predict_columns.difference(tractate_train_columns))
print(tractate_train_columns.difference(tractate_predict_columns)) # print(tractate_train_columns.difference(tractate_predict_columns))
assert tractate_predict_columns == tractate_train_columns # assert tractate_predict_columns == tractate_train_columns
# dataset_path = Path("~/data/cvr_data").expanduser() # local # # dataset_path = Path("~/data/cvr_data").expanduser() # local
dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server # dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path) # tractate_df, tractate_click_df, tractate_conversion_df = tractate_fe.read_csv_data(dataset_path)
tractate_df = tractate_fe.tractate_feature_engineering(tractate_df) # tractate_df = tractate_fe.tractate_feature_engineering(tractate_df)
device_df = device_fe.read_csv_data(dataset_path) # device_df = device_fe.read_csv_data(dataset_path)
device_df = device_fe.device_feature_engineering(device_df, "tractate") # device_df = device_fe.device_feature_engineering(device_df, "tractate")
# print(device_df.columns) # # print(device_df.columns)
# print(device_df.dtypes, "\n") # # print(device_df.dtypes, "\n")
cc_df = click_fe.click_feature_engineering(tractate_click_df, tractate_conversion_df) # cc_df = click_fe.click_feature_engineering(tractate_click_df, tractate_conversion_df)
df = tractate_fe.join_features(device_df, tractate_df, cc_df) # df = tractate_fe.join_features(device_df, tractate_df, cc_df)
# for i in df.columns: # # for i in df.columns:
# print(i) # # print(i)
# print(df.dtypes) # # print(df.dtypes)
train_df, test_df = train_test_split(df, test_size=0.2) # train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2) # train_df, val_df = train_test_split(train_df, test_size=0.2)
all_features = fe.build_features(df, tractate_fe.INT_COLUMNS, tractate_fe.FLOAT_COLUMNS, tractate_fe.CATEGORICAL_COLUMNS, # all_features = fe.build_features(df, tractate_fe.INT_COLUMNS, tractate_fe.FLOAT_COLUMNS, tractate_fe.CATEGORICAL_COLUMNS,
tractate_fe.CROSS_COLUMNS) # tractate_fe.CROSS_COLUMNS)
params = {"feature_columns": all_features, "hidden_units": [360, 200, 80, 2], "learning_rate": 0.2} # params = {"feature_columns": all_features, "hidden_units": [360, 200, 80, 2], "learning_rate": 0.2}
model_path = str(Path("/data/files/model_tmp/tractate/").expanduser()) # model_path = str(Path("/data/files/model_tmp/tractate/").expanduser())
if os.path.exists(model_path): # if os.path.exists(model_path):
shutil.rmtree(model_path) # shutil.rmtree(model_path)
session_config = tf.compat.v1.ConfigProto() # session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.allow_growth = True # session_config.gpu_options.allow_growth = True
session_config.gpu_options.per_process_gpu_memory_fraction = 0.7 # session_config.gpu_options.per_process_gpu_memory_fraction = 0.7
# session_config.inter_op_parallelism_threads = 1 # # session_config.inter_op_parallelism_threads = 1
# session_config.intra_op_parallelism_threads = 1 # # session_config.intra_op_parallelism_threads = 1
estimator_config = tf.estimator.RunConfig(session_config=session_config) # estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) # model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
# TODO 50000 # # TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=12000) # train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=12000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) # eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec) # res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
print(res[0]) # print(res[0])
ctr_auc = str(res[0]["ctr_auc"]) # ctr_auc = str(res[0]["ctr_auc"])
ctcvr_auc = str(res[0]["ctcvr_auc"]) # ctcvr_auc = str(res[0]["ctcvr_auc"])
print("ctr_auc: " + ctr_auc) # print("ctr_auc: " + ctr_auc)
print("ctcvr_auc: " + ctcvr_auc) # print("ctcvr_auc: " + ctcvr_auc)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
total_time = "{:.2f}".format((time.time() - time_begin) / 60) # total_time = "{:.2f}".format((time.time() - time_begin) / 60)
model_export_path = str(Path("/data/files/models/tractate/").expanduser()) # model_export_path = str(Path("/data/files/models/tractate/").expanduser())
save_path = model_export(model, all_features, model_export_path) # save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path) # print("save to: " + save_path)
# TODO save model # # TODO save model
# set_essm_model_save_path("tractate", save_path) # # set_essm_model_save_path("tractate", save_path)
# record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path) # # record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path)
print("============================================================") # print("============================================================")
# save_path = get_essm_model_save_path("tractate") # save_path = get_essm_model_save_path("tractate")
# print("load path: " + save_path) # print("load path: " + save_path)
# save_path = str(Path("~/data/models/tractate/1598236893").expanduser()) # local # save_path = str(Path("~/data/models/tractate/1598236893").expanduser()) # local
save_path = "/data/files/models/tractate/1599123885" # server save_path = "/data/files/models/tractate/1599128140" # server
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
device_dict = device_fe.get_device_dict_from_redis() device_dict = device_fe.get_device_dict_from_redis()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment