Commit b49d7e6a authored by 赵威's avatar 赵威

try cross features for tractate

parent d8478ce8
...@@ -34,7 +34,7 @@ def user_portrait_scan_info(device_dict, diary_dict, tractate_dict, diary_predic ...@@ -34,7 +34,7 @@ def user_portrait_scan_info(device_dict, diary_dict, tractate_dict, diary_predic
all_count += 1 all_count += 1
print(str(all_count) + ": " + device_id) print(str(all_count) + ": " + device_id)
offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=predict_size) offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=predict_size)
print("---------------------") print("-------------------")
offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=predict_size) offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=predict_size)
print("=========================================\n") print("=========================================\n")
...@@ -61,7 +61,7 @@ def user_portrait_increment_scan_info(device_dict, diary_dict, tractate_dict, di ...@@ -61,7 +61,7 @@ def user_portrait_increment_scan_info(device_dict, diary_dict, tractate_dict, di
all_count += 1 all_count += 1
print(str(all_count) + ": " + device_id) print(str(all_count) + ": " + device_id)
offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=200) offline_predict_diary(device_id, device_dict, diary_dict, diary_predict_fn, size=200)
print("---------------------") print("-------------------")
offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=200) offline_predict_tractate(device_id, device_dict, tractate_dict, tractate_predict_fn, size=200)
print("=========================================\n") print("=========================================\n")
......
...@@ -175,20 +175,20 @@ _categorical_columns = [ ...@@ -175,20 +175,20 @@ _categorical_columns = [
"click_diary_id4", "click_diary_id4",
"click_diary_id5", "click_diary_id5",
"service_city", "service_city",
# "device_fd2", "device_fd2",
# "device_sd2", "device_sd2",
# "device_fs2", "device_fs2",
# "device_ss2", "device_ss2",
# "device_fp2", "device_fp2",
# "device_sp2", "device_sp2",
# "device_p2", "device_p2",
# "device_fd3", "device_fd3",
# "device_sd3", "device_sd3",
# "device_fs3", "device_fs3",
# "device_ss3", "device_ss3",
# "device_fp3", "device_fp3",
# "device_sp3", "device_sp3",
# "device_p3", "device_p3",
] ]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
...@@ -303,20 +303,20 @@ CATEGORICAL_COLUMNS = [ ...@@ -303,20 +303,20 @@ CATEGORICAL_COLUMNS = [
"click_diary_id5", "click_diary_id5",
"is_related_service", "is_related_service",
"service_city", "service_city",
# "device_fd2", "device_fd2",
# "device_sd2", "device_sd2",
# "device_fs2", "device_fs2",
# "device_ss2", "device_ss2",
# "device_fp2", "device_fp2",
# "device_sp2", "device_sp2",
# "device_p2", "device_p2",
# "device_fd3", "device_fd3",
# "device_sd3", "device_sd3",
# "device_fs3", "device_fs3",
# "device_ss3", "device_ss3",
# "device_fp3", "device_fp3",
# "device_sp3", "device_sp3",
# "device_p3", "device_p3",
] ]
CROSS_COLUMNS = [ CROSS_COLUMNS = [
["device_fd", "content_fd"], ["device_fd", "content_fd"],
...@@ -326,20 +326,20 @@ CROSS_COLUMNS = [ ...@@ -326,20 +326,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"], ["device_fp", "content_fp"],
["device_sp", "content_sp"], ["device_sp", "content_sp"],
["device_p", "content_p"], ["device_p", "content_p"],
# ["device_fd2", "content_fd"], ["device_fd2", "content_fd"],
# ["device_sd2", "content_sd"], ["device_sd2", "content_sd"],
# ["device_fs2", "content_fs"], ["device_fs2", "content_fs"],
# ["device_ss2", "content_ss"], ["device_ss2", "content_ss"],
# ["device_fp2", "content_fp"], ["device_fp2", "content_fp"],
# ["device_sp2", "content_sp"], ["device_sp2", "content_sp"],
# ["device_p2", "content_p"], ["device_p2", "content_p"],
# ["device_fd3", "content_fd"], ["device_fd3", "content_fd"],
# ["device_sd3", "content_sd"], ["device_sd3", "content_sd"],
# ["device_fs3", "content_fs"], ["device_fs3", "content_fs"],
# ["device_ss3", "content_ss"], ["device_ss3", "content_ss"],
# ["device_fp3", "content_fp"], ["device_fp3", "content_fp"],
# ["device_sp3", "content_sp"], ["device_sp3", "content_sp"],
# ["device_p3", "content_p"], ["device_p3", "content_p"],
] ]
...@@ -448,21 +448,21 @@ def join_features(device_df, diary_df, cc_df): ...@@ -448,21 +448,21 @@ def join_features(device_df, diary_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0)) df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0)) df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
# df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1)) df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1)) df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1)) df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1)) df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1)) df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1)) df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1)) df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
# df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2)) df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2)) df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2)) df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2)) df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2)) df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2)) df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2)) df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
...@@ -551,20 +551,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict): ...@@ -551,20 +551,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
device_info["device_fp"] = nth_element(device_fp, 0) device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0) device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0) device_info["device_p"] = nth_element(device_p, 0)
# device_info["device_fd2"] = nth_element(device_fd, 1) device_info["device_fd2"] = nth_element(device_fd, 1)
# device_info["device_sd2"] = nth_element(device_sd, 1) device_info["device_sd2"] = nth_element(device_sd, 1)
# device_info["device_fs2"] = nth_element(device_fs, 1) device_info["device_fs2"] = nth_element(device_fs, 1)
# device_info["device_ss2"] = nth_element(device_ss, 1) device_info["device_ss2"] = nth_element(device_ss, 1)
# device_info["device_fp2"] = nth_element(device_fp, 1) device_info["device_fp2"] = nth_element(device_fp, 1)
# device_info["device_sp2"] = nth_element(device_sp, 1) device_info["device_sp2"] = nth_element(device_sp, 1)
# device_info["device_p2"] = nth_element(device_p, 1) device_info["device_p2"] = nth_element(device_p, 1)
# device_info["device_fd3"] = nth_element(device_fd, 2) device_info["device_fd3"] = nth_element(device_fd, 2)
# device_info["device_sd3"] = nth_element(device_sd, 2) device_info["device_sd3"] = nth_element(device_sd, 2)
# device_info["device_fs3"] = nth_element(device_fs, 2) device_info["device_fs3"] = nth_element(device_fs, 2)
# device_info["device_ss3"] = nth_element(device_ss, 2) device_info["device_ss3"] = nth_element(device_ss, 2)
# device_info["device_fp3"] = nth_element(device_fp, 2) device_info["device_fp3"] = nth_element(device_fp, 2)
# device_info["device_sp3"] = nth_element(device_sp, 2) device_info["device_sp3"] = nth_element(device_sp, 2)
# device_info["device_p3"] = nth_element(device_p, 2) device_info["device_p3"] = nth_element(device_p, 2)
diary_lst = [] diary_lst = []
diary_ids_res = [] diary_ids_res = []
for id in diary_ids: for id in diary_ids:
......
...@@ -288,6 +288,20 @@ CATEGORICAL_COLUMNS = [ ...@@ -288,6 +288,20 @@ CATEGORICAL_COLUMNS = [
"click_tractate_id5", "click_tractate_id5",
"is_related_service", "is_related_service",
"service_city", "service_city",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
] ]
CROSS_COLUMNS = [ CROSS_COLUMNS = [
["device_fd", "content_fd"], ["device_fd", "content_fd"],
...@@ -297,6 +311,20 @@ CROSS_COLUMNS = [ ...@@ -297,6 +311,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"], ["device_fp", "content_fp"],
["device_sp", "content_sp"], ["device_sp", "content_sp"],
["device_p", "content_p"], ["device_p", "content_p"],
["device_fd2", "content_fd"],
["device_sd2", "content_sd"],
["device_fs2", "content_fs"],
["device_ss2", "content_ss"],
["device_fp2", "content_fp"],
["device_sp2", "content_sp"],
["device_p2", "content_p"],
["device_fd3", "content_fd"],
["device_sd3", "content_sd"],
["device_fs3", "content_fs"],
["device_ss3", "content_ss"],
["device_fp3", "content_fp"],
["device_sp3", "content_sp"],
["device_p3", "content_p"],
] ]
...@@ -406,6 +434,22 @@ def join_features(device_df, tractate_df, cc_df): ...@@ -406,6 +434,22 @@ def join_features(device_df, tractate_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0)) df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0)) df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0)) df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
...@@ -494,6 +538,20 @@ def device_tractate_fe(device_id, tractate_ids, device_dict, tractate_dict): ...@@ -494,6 +538,20 @@ def device_tractate_fe(device_id, tractate_ids, device_dict, tractate_dict):
device_info["device_fp"] = nth_element(device_fp, 0) device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0) device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0) device_info["device_p"] = nth_element(device_p, 0)
device_info["device_fd2"] = nth_element(device_fd, 1)
device_info["device_sd2"] = nth_element(device_sd, 1)
device_info["device_fs2"] = nth_element(device_fs, 1)
device_info["device_ss2"] = nth_element(device_ss, 1)
device_info["device_fp2"] = nth_element(device_fp, 1)
device_info["device_sp2"] = nth_element(device_sp, 1)
device_info["device_p2"] = nth_element(device_p, 1)
device_info["device_fd3"] = nth_element(device_fd, 2)
device_info["device_sd3"] = nth_element(device_sd, 2)
device_info["device_fs3"] = nth_element(device_fs, 2)
device_info["device_ss3"] = nth_element(device_ss, 2)
device_info["device_fp3"] = nth_element(device_fp, 2)
device_info["device_sp3"] = nth_element(device_sp, 2)
device_info["device_p3"] = nth_element(device_p, 2)
tractate_lst = [] tractate_lst = []
tractate_ids_res = [] tractate_ids_res = []
for id in tractate_ids: for id in tractate_ids:
......
...@@ -167,6 +167,20 @@ _categorical_columns = [ ...@@ -167,6 +167,20 @@ _categorical_columns = [
"click_tractate_id4", "click_tractate_id4",
"click_tractate_id5", "click_tractate_id5",
"service_city", "service_city",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
] ]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
...@@ -59,7 +59,9 @@ def main(): ...@@ -59,7 +59,9 @@ def main():
session_config = tf.compat.v1.ConfigProto() session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.allow_growth = True session_config.gpu_options.allow_growth = True
session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.per_process_gpu_memory_fraction = 0.7
session_config.inter_op_parallelism_threads = 1
session_config.intra_op_parallelism_threads = 1
estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
......
...@@ -56,11 +56,14 @@ def main(): ...@@ -56,11 +56,14 @@ def main():
session_config = tf.compat.v1.ConfigProto() session_config = tf.compat.v1.ConfigProto()
session_config.gpu_options.allow_growth = True session_config.gpu_options.allow_growth = True
session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.per_process_gpu_memory_fraction = 0.7
session_config.inter_op_parallelism_threads = 1
session_config.intra_op_parallelism_threads = 1
estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000) # TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=15000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec) res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
...@@ -75,8 +78,9 @@ def main(): ...@@ -75,8 +78,9 @@ def main():
model_export_path = str(Path("/data/files/models/tractate/").expanduser()) model_export_path = str(Path("/data/files/models/tractate/").expanduser())
save_path = model_export(model, all_features, model_export_path) save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path) print("save to: " + save_path)
set_essm_model_save_path("tractate", save_path) # TODO save model
record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path) # set_essm_model_save_path("tractate", save_path)
# record_esmm_auc_to_db("tractate", ctr_auc, ctcvr_auc, total_time, save_path)
print("============================================================") print("============================================================")
# save_path = get_essm_model_save_path("tractate") # save_path = get_essm_model_save_path("tractate")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment