Commit b49d7e6a authored by 赵威's avatar 赵威

try cross features for tractate

parent d8478ce8
......@@ -34,7 +34,7 @@ def user_portrait_scan_info(device_dict, diary_dict, tractate_dict, diary_predic
all_count += 1
print(str(all_count) + ": " + device_id)
offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=predict_size)
print("---------------------")
print("-------------------")
offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=predict_size)
print("=========================================\n")
......@@ -61,7 +61,7 @@ def user_portrait_increment_scan_info(device_dict, diary_dict, tractate_dict, di
all_count += 1
print(str(all_count) + ": " + device_id)
offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=200)
print("---------------------")
print("-------------------")
offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=200)
print("=========================================\n")
......
......@@ -175,20 +175,20 @@ _categorical_columns = [
"click_diary_id4",
"click_diary_id5",
"service_city",
# "device_fd2",
# "device_sd2",
# "device_fs2",
# "device_ss2",
# "device_fp2",
# "device_sp2",
# "device_p2",
# "device_fd3",
# "device_sd3",
# "device_fs3",
# "device_ss3",
# "device_fp3",
# "device_sp3",
# "device_p3",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
......@@ -303,20 +303,20 @@ CATEGORICAL_COLUMNS = [
"click_diary_id5",
"is_related_service",
"service_city",
# "device_fd2",
# "device_sd2",
# "device_fs2",
# "device_ss2",
# "device_fp2",
# "device_sp2",
# "device_p2",
# "device_fd3",
# "device_sd3",
# "device_fs3",
# "device_ss3",
# "device_fp3",
# "device_sp3",
# "device_p3",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
]
CROSS_COLUMNS = [
["device_fd", "content_fd"],
......@@ -326,20 +326,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"],
["device_sp", "content_sp"],
["device_p", "content_p"],
# ["device_fd2", "content_fd"],
# ["device_sd2", "content_sd"],
# ["device_fs2", "content_fs"],
# ["device_ss2", "content_ss"],
# ["device_fp2", "content_fp"],
# ["device_sp2", "content_sp"],
# ["device_p2", "content_p"],
# ["device_fd3", "content_fd"],
# ["device_sd3", "content_sd"],
# ["device_fs3", "content_fs"],
# ["device_ss3", "content_ss"],
# ["device_fp3", "content_fp"],
# ["device_sp3", "content_sp"],
# ["device_p3", "content_p"],
["device_fd2", "content_fd"],
["device_sd2", "content_sd"],
["device_fs2", "content_fs"],
["device_ss2", "content_ss"],
["device_fp2", "content_fp"],
["device_sp2", "content_sp"],
["device_p2", "content_p"],
["device_fd3", "content_fd"],
["device_sd3", "content_sd"],
["device_fs3", "content_fs"],
["device_ss3", "content_ss"],
["device_fp3", "content_fp"],
["device_sp3", "content_sp"],
["device_p3", "content_p"],
]
......@@ -448,21 +448,21 @@ def join_features(device_df, diary_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
# df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
# df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
......@@ -551,20 +551,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0)
# device_info["device_fd2"] = nth_element(device_fd, 1)
# device_info["device_sd2"] = nth_element(device_sd, 1)
# device_info["device_fs2"] = nth_element(device_fs, 1)
# device_info["device_ss2"] = nth_element(device_ss, 1)
# device_info["device_fp2"] = nth_element(device_fp, 1)
# device_info["device_sp2"] = nth_element(device_sp, 1)
# device_info["device_p2"] = nth_element(device_p, 1)
# device_info["device_fd3"] = nth_element(device_fd, 2)
# device_info["device_sd3"] = nth_element(device_sd, 2)
# device_info["device_fs3"] = nth_element(device_fs, 2)
# device_info["device_ss3"] = nth_element(device_ss, 2)
# device_info["device_fp3"] = nth_element(device_fp, 2)
# device_info["device_sp3"] = nth_element(device_sp, 2)
# device_info["device_p3"] = nth_element(device_p, 2)
device_info["device_fd2"] = nth_element(device_fd, 1)
device_info["device_sd2"] = nth_element(device_sd, 1)
device_info["device_fs2"] = nth_element(device_fs, 1)
device_info["device_ss2"] = nth_element(device_ss, 1)
device_info["device_fp2"] = nth_element(device_fp, 1)
device_info["device_sp2"] = nth_element(device_sp, 1)
device_info["device_p2"] = nth_element(device_p, 1)
device_info["device_fd3"] = nth_element(device_fd, 2)
device_info["device_sd3"] = nth_element(device_sd, 2)
device_info["device_fs3"] = nth_element(device_fs, 2)
device_info["device_ss3"] = nth_element(device_ss, 2)
device_info["device_fp3"] = nth_element(device_fp, 2)
device_info["device_sp3"] = nth_element(device_sp, 2)
device_info["device_p3"] = nth_element(device_p, 2)
diary_lst = []
diary_ids_res = []
for id in diary_ids:
......
......@@ -288,6 +288,20 @@ CATEGORICAL_COLUMNS = [
"click_tractate_id5",
"is_related_service",
"service_city",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
]
CROSS_COLUMNS = [
["device_fd", "content_fd"],
......@@ -297,6 +311,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"],
["device_sp", "content_sp"],
["device_p", "content_p"],
["device_fd2", "content_fd"],
["device_sd2", "content_sd"],
["device_fs2", "content_fs"],
["device_ss2", "content_ss"],
["device_fp2", "content_fp"],
["device_sp2", "content_sp"],
["device_p2", "content_p"],
["device_fd3", "content_fd"],
["device_sd3", "content_sd"],
["device_fs3", "content_fs"],
["device_ss3", "content_ss"],
["device_fp3", "content_fp"],
["device_sp3", "content_sp"],
["device_p3", "content_p"],
]
......@@ -406,6 +434,22 @@ def join_features(device_df, tractate_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
......@@ -494,6 +538,20 @@ def device_tractate_fe(device_id, tractate_ids, device_dict, tractate_dict):
device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0)
device_info["device_fd2"] = nth_element(device_fd, 1)
device_info["device_sd2"] = nth_element(device_sd, 1)
device_info["device_fs2"] = nth_element(device_fs, 1)
device_info["device_ss2"] = nth_element(device_ss, 1)
device_info["device_fp2"] = nth_element(device_fp, 1)
device_info["device_sp2"] = nth_element(device_sp, 1)
device_info["device_p2"] = nth_element(device_p, 1)
device_info["device_fd3"] = nth_element(device_fd, 2)
device_info["device_sd3"] = nth_element(device_sd, 2)
device_info["device_fs3"] = nth_element(device_fs, 2)
device_info["device_ss3"] = nth_element(device_ss, 2)
device_info["device_fp3"] = nth_element(device_fp, 2)
device_info["device_sp3"] = nth_element(device_sp, 2)
device_info["device_p3"] = nth_element(device_p, 2)
tractate_lst = []
tractate_ids_res = []
for id in tractate_ids:
......
......@@ -167,6 +167,20 @@ _categorical_columns = [
"click_tractate_id4",
"click_tractate_id5",
"service_city",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
......@@ -59,7 +59,9 @@ def main():
session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.allow_growth = True
session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
session_config.gpu_options.per_process_gpu_memory_fraction = 0.7
session_config.inter_op_parallelism_threads = 1
session_config.intra_op_parallelism_threads = 1
estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
......
......@@ -56,11 +56,14 @@ def main():
session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.allow_growth = True
session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
session_config.gpu_options.per_process_gpu_memory_fraction = 0.7
session_config.inter_op_parallelism_threads = 1
session_config.intra_op_parallelism_threads = 1
estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
# TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=15000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
......@@ -75,8 +78,9 @@ def main():
model_export_path = str(Path("/data/files/models/tractate/").expanduser())
save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path)
set_essm_model_save_path("tractate", save_path)
record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path)
# TODO save model
# set_essm_model_save_path("tractate", save_path)
# record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path)
print("============================================================")
# save_path = get_essm_model_save_path("tractate")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment