Commit 1a32da74 authored by 赵威's avatar 赵威

retrain

parent 883c544f
...@@ -72,22 +72,6 @@ _int_columns = [ ...@@ -72,22 +72,6 @@ _int_columns = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"one_vote_pure_rate",
"three_vote_pure_rate",
"seven_vote_pure_rate",
"fifteen_vote_pure_rate",
"thirty_vote_pure_rate",
"sixty_vote_pure_rate",
"ninety_vote_pure_rate",
"history_vote_pure_rate",
"one_reply_pure_rate",
"three_reply_pure_rate",
"seven_reply_pure_rate",
"fifteen_reply_pure_rate",
"thirty_reply_pure_rate",
"sixty_reply_pure_rate",
"ninety_reply_pure_rate",
"history_reply_pure_rate",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -98,6 +82,22 @@ _float_columns = [ ...@@ -98,6 +82,22 @@ _float_columns = [
"sixty_ctr", "sixty_ctr",
"ninety_ctr", "ninety_ctr",
"history_ctr", "history_ctr",
# "one_vote_pure_rate",
# "three_vote_pure_rate",
# "seven_vote_pure_rate",
# "fifteen_vote_pure_rate",
# "thirty_vote_pure_rate",
# "sixty_vote_pure_rate",
# "ninety_vote_pure_rate",
# "history_vote_pure_rate",
# "one_reply_pure_rate",
# "three_reply_pure_rate",
# "seven_reply_pure_rate",
# "fifteen_reply_pure_rate",
# "thirty_reply_pure_rate",
# "sixty_reply_pure_rate",
# "ninety_reply_pure_rate",
# "history_reply_pure_rate",
] ]
_categorical_columns = [ _categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd", "device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
......
...@@ -70,22 +70,22 @@ DIARY_COLUMNS = [ ...@@ -70,22 +70,22 @@ DIARY_COLUMNS = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"one_vote_pure_rate", # "one_vote_pure_rate",
"three_vote_pure_rate", # "three_vote_pure_rate",
"seven_vote_pure_rate", # "seven_vote_pure_rate",
"fifteen_vote_pure_rate", # "fifteen_vote_pure_rate",
"thirty_vote_pure_rate", # "thirty_vote_pure_rate",
"sixty_vote_pure_rate", # "sixty_vote_pure_rate",
"ninety_vote_pure_rate", # "ninety_vote_pure_rate",
"history_vote_pure_rate", # "history_vote_pure_rate",
"one_reply_pure_rate", # "one_reply_pure_rate",
"three_reply_pure_rate", # "three_reply_pure_rate",
"seven_reply_pure_rate", # "seven_reply_pure_rate",
"fifteen_reply_pure_rate", # "fifteen_reply_pure_rate",
"thirty_reply_pure_rate", # "thirty_reply_pure_rate",
"sixty_reply_pure_rate", # "sixty_reply_pure_rate",
"ninety_reply_pure_rate", # "ninety_reply_pure_rate",
"history_reply_pure_rate", # "history_reply_pure_rate",
"one_ctr", "one_ctr",
"three_ctr", "three_ctr",
"seven_ctr", "seven_ctr",
...@@ -173,22 +173,22 @@ FLOAT_COLUMNS = [ ...@@ -173,22 +173,22 @@ FLOAT_COLUMNS = [
"sixty_ctr", "sixty_ctr",
"ninety_ctr", "ninety_ctr",
"history_ctr", "history_ctr",
"one_vote_pure_rate", # "one_vote_pure_rate",
"three_vote_pure_rate", # "three_vote_pure_rate",
"seven_vote_pure_rate", # "seven_vote_pure_rate",
"fifteen_vote_pure_rate", # "fifteen_vote_pure_rate",
"thirty_vote_pure_rate", # "thirty_vote_pure_rate",
"sixty_vote_pure_rate", # "sixty_vote_pure_rate",
"ninety_vote_pure_rate", # "ninety_vote_pure_rate",
"history_vote_pure_rate", # "history_vote_pure_rate",
"one_reply_pure_rate", # "one_reply_pure_rate",
"three_reply_pure_rate", # "three_reply_pure_rate",
"seven_reply_pure_rate", # "seven_reply_pure_rate",
"fifteen_reply_pure_rate", # "fifteen_reply_pure_rate",
"thirty_reply_pure_rate", # "thirty_reply_pure_rate",
"sixty_reply_pure_rate", # "sixty_reply_pure_rate",
"ninety_reply_pure_rate", # "ninety_reply_pure_rate",
"history_reply_pure_rate", # "history_reply_pure_rate",
] ]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
......
...@@ -62,22 +62,22 @@ TRACTATE_COLUMNS = [ ...@@ -62,22 +62,22 @@ TRACTATE_COLUMNS = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"one_vote_pure_rate", # "one_vote_pure_rate",
"three_vote_pure_rate", # "three_vote_pure_rate",
"seven_vote_pure_rate", # "seven_vote_pure_rate",
"fifteen_vote_pure_rate", # "fifteen_vote_pure_rate",
"thirty_vote_pure_rate", # "thirty_vote_pure_rate",
"sixty_vote_pure_rate", # "sixty_vote_pure_rate",
"ninety_vote_pure_rate", # "ninety_vote_pure_rate",
"history_vote_pure_rate", # "history_vote_pure_rate",
"one_reply_pure_rate", # "one_reply_pure_rate",
"three_reply_pure_rate", # "three_reply_pure_rate",
"seven_reply_pure_rate", # "seven_reply_pure_rate",
"fifteen_reply_pure_rate", # "fifteen_reply_pure_rate",
"thirty_reply_pure_rate", # "thirty_reply_pure_rate",
"sixty_reply_pure_rate", # "sixty_reply_pure_rate",
"ninety_reply_pure_rate", # "ninety_reply_pure_rate",
"history_reply_pure_rate", # "history_reply_pure_rate",
"one_ctr", "one_ctr",
"three_ctr", "three_ctr",
"seven_ctr", "seven_ctr",
...@@ -157,22 +157,22 @@ FLOAT_COLUMNS = [ ...@@ -157,22 +157,22 @@ FLOAT_COLUMNS = [
"sixty_ctr", "sixty_ctr",
"ninety_ctr", "ninety_ctr",
"history_ctr", "history_ctr",
"one_vote_pure_rate", # "one_vote_pure_rate",
"three_vote_pure_rate", # "three_vote_pure_rate",
"seven_vote_pure_rate", # "seven_vote_pure_rate",
"fifteen_vote_pure_rate", # "fifteen_vote_pure_rate",
"thirty_vote_pure_rate", # "thirty_vote_pure_rate",
"sixty_vote_pure_rate", # "sixty_vote_pure_rate",
"ninety_vote_pure_rate", # "ninety_vote_pure_rate",
"history_vote_pure_rate", # "history_vote_pure_rate",
"one_reply_pure_rate", # "one_reply_pure_rate",
"three_reply_pure_rate", # "three_reply_pure_rate",
"seven_reply_pure_rate", # "seven_reply_pure_rate",
"fifteen_reply_pure_rate", # "fifteen_reply_pure_rate",
"thirty_reply_pure_rate", # "thirty_reply_pure_rate",
"sixty_reply_pure_rate", # "sixty_reply_pure_rate",
"ninety_reply_pure_rate", # "ninety_reply_pure_rate",
"history_reply_pure_rate", # "history_reply_pure_rate",
] ]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
......
...@@ -73,22 +73,22 @@ _float_columns = [ ...@@ -73,22 +73,22 @@ _float_columns = [
"sixty_ctr", "sixty_ctr",
"ninety_ctr", "ninety_ctr",
"history_ctr", "history_ctr",
"one_vote_pure_rate", # "one_vote_pure_rate",
"three_vote_pure_rate", # "three_vote_pure_rate",
"seven_vote_pure_rate", # "seven_vote_pure_rate",
"fifteen_vote_pure_rate", # "fifteen_vote_pure_rate",
"thirty_vote_pure_rate", # "thirty_vote_pure_rate",
"sixty_vote_pure_rate", # "sixty_vote_pure_rate",
"ninety_vote_pure_rate", # "ninety_vote_pure_rate",
"history_vote_pure_rate", # "history_vote_pure_rate",
"one_reply_pure_rate", # "one_reply_pure_rate",
"three_reply_pure_rate", # "three_reply_pure_rate",
"seven_reply_pure_rate", # "seven_reply_pure_rate",
"fifteen_reply_pure_rate", # "fifteen_reply_pure_rate",
"thirty_reply_pure_rate", # "thirty_reply_pure_rate",
"sixty_reply_pure_rate", # "sixty_reply_pure_rate",
"ninety_reply_pure_rate", # "ninety_reply_pure_rate",
"history_reply_pure_rate", # "history_reply_pure_rate",
] ]
_categorical_columns = [ _categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "show_tag_id", "device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "show_tag_id",
......
...@@ -45,13 +45,13 @@ if __name__ == "__main__": ...@@ -45,13 +45,13 @@ if __name__ == "__main__":
diary_save_path = get_essm_model_save_path("diary") diary_save_path = get_essm_model_save_path("diary")
if not diary_save_path: if not diary_save_path:
diary_save_path = "/home/gmuser/data/models/diary/1597050209" diary_save_path = "/home/gmuser/data/models/diary/1597379800"
print(diary_save_path + "!!!!!!!!!!!!!!!!!!!!!!!!!!!") print(diary_save_path + "!!!!!!!!!!!!!!!!!!!!!!!!!!!")
diary_predict_fn = tf.contrib.predictor.from_saved_model(diary_save_path) diary_predict_fn = tf.contrib.predictor.from_saved_model(diary_save_path)
tractate_save_path = get_essm_model_save_path("tractate") tractate_save_path = get_essm_model_save_path("tractate")
if not tractate_save_path: if not tractate_save_path:
tractate_save_path = "/home/gmuser/data/models/tractate/1596509299" tractate_save_path = "/home/gmuser/data/models/tractate/1597378202"
print(tractate_save_path + "!!!!!!!!!!!!!!!!!!!!!!!!!!!") print(tractate_save_path + "!!!!!!!!!!!!!!!!!!!!!!!!!!!")
tractate_predict_fn = tf.contrib.predictor.from_saved_model(tractate_save_path) tractate_predict_fn = tf.contrib.predictor.from_saved_model(tractate_save_path)
......
...@@ -9,8 +9,7 @@ from pathlib import Path ...@@ -9,8 +9,7 @@ from pathlib import Path
import tensorflow as tf import tensorflow as tf
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from models.esmm.diary_model import (PREDICTION_ALL_COLUMNS, from models.esmm.diary_model import PREDICTION_ALL_COLUMNS, model_predict_diary
model_predict_diary)
from models.esmm.fe import click_fe, device_fe, diary_fe, fe from models.esmm.fe import click_fe, device_fe, diary_fe, fe
from models.esmm.input_fn import esmm_input_fn from models.esmm.input_fn import esmm_input_fn
from models.esmm.model import esmm_model_fn, model_export from models.esmm.model import esmm_model_fn, model_export
...@@ -24,70 +23,71 @@ def main(): ...@@ -24,70 +23,71 @@ def main():
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS) diary_train_columns = set(diary_fe.INT_COLUMNS + diary_fe.FLOAT_COLUMNS + diary_fe.CATEGORICAL_COLUMNS)
# print("features: " + str(len(diary_train_columns))) print("features: " + str(len(diary_train_columns)))
# diary_predict_columns = set(PREDICTION_ALL_COLUMNS) diary_predict_columns = set(PREDICTION_ALL_COLUMNS)
# print(diary_predict_columns.difference(diary_train_columns)) print(diary_predict_columns.difference(diary_train_columns))
# print(diary_train_columns.difference(diary_predict_columns)) print(diary_train_columns.difference(diary_predict_columns))
# assert diary_predict_columns == diary_train_columns assert diary_predict_columns == diary_train_columns
# # dataset_path = Path("~/data/cvr_data").expanduser() # local # dataset_path = Path("~/data/cvr_data").expanduser() # local
# dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server dataset_path = Path("/srv/apps/node2vec_git/cvr_data/") # server
# diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path) diary_df, diary_click_df, diary_conversion_df = diary_fe.read_csv_data(dataset_path)
# # print(diary_df.sample(1)) # print(diary_df.sample(1))
# diary_df = diary_fe.diary_feature_engineering(diary_df) diary_df = diary_fe.diary_feature_engineering(diary_df)
# # print(diary_df.sample(1)) # print(diary_df.sample(1))
# device_df = device_fe.read_csv_data(dataset_path) device_df = device_fe.read_csv_data(dataset_path)
# # print(diary_df.sample(1)) # print(diary_df.sample(1))
# device_df = device_fe.device_feature_engineering(device_df, "diary") device_df = device_fe.device_feature_engineering(device_df, "diary")
# # print(device_df.sample(1)) # print(device_df.sample(1))
# cc_df = click_fe.click_feature_engineering(diary_click_df, diary_conversion_df) cc_df = click_fe.click_feature_engineering(diary_click_df, diary_conversion_df)
# # print(cc_df.sample(1)) # print(cc_df.sample(1))
# df = diary_fe.join_features(device_df, diary_df, cc_df) df = diary_fe.join_features(device_df, diary_df, cc_df)
# # print(df.sample(1)) # print(df.sample(1))
# # print(df.dtypes) # print(df.dtypes)
# train_df, test_df = train_test_split(df, test_size=0.2) train_df, test_df = train_test_split(df, test_size=0.2)
# train_df, val_df = train_test_split(train_df, test_size=0.2) train_df, val_df = train_test_split(train_df, test_size=0.2)
# all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS) all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS)
# params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1} params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
# model_path = str(Path("~/data/model_tmp/diary/").expanduser()) model_path = str(Path("~/data/model_tmp/diary/").expanduser())
# if os.path.exists(model_path): if os.path.exists(model_path):
# shutil.rmtree(model_path) shutil.rmtree(model_path)
# session_config = tf.compat.v1.ConfigProto() session_config = tf.compat.v1.ConfigProto()
# session_config.gpu_options.allow_growth = True session_config.gpu_options.allow_growth = True
# session_config.gpu_options.per_process_gpu_memory_fraction = 0.9 session_config.gpu_options.per_process_gpu_memory_fraction = 0.9
# estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
# model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
# train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000) train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
# eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
# res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec) res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
# print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
# print(res[0]) print(res[0])
# print("ctr_auc: " + str(res[0]["ctr_auc"])) print("ctr_auc: " + str(res[0]["ctr_auc"]))
# print("ctcvr_auc: " + str(res[0]["ctcvr_auc"])) print("ctcvr_auc: " + str(res[0]["ctcvr_auc"]))
# print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
# model_export_path = str(Path("~/data/models/diary").expanduser()) model_export_path = str(Path("~/data/models/diary").expanduser())
# save_path = model_export(model, all_features, model_export_path) save_path = model_export(model, all_features, model_export_path)
# print("save to: " + save_path) print("save to: " + save_path)
# set_essm_model_save_path("diary", save_path) set_essm_model_save_path("diary", save_path)
# print("============================================================") print("============================================================")
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local # save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
# save_path = "/home/gmuser/data/models/diary/1596083349" # server # save_path = "/home/gmuser/data/models/diary/1596083349" # server
# tf.saved_model.load # tf.saved_model.load
save_path = get_essm_model_save_path("diary") # save_path = get_essm_model_save_path("diary")
# print("load path: " + save_path)
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
device_dict = device_fe.get_device_dict_from_redis() device_dict = device_fe.get_device_dict_from_redis()
diary_dict = diary_fe.get_diary_dict_from_redis() diary_dict = diary_fe.get_diary_dict_from_redis()
print("redis data: " + str(len(device_dict)) + " " + str(len(diary_dict))) print("redis data: " + str(len(device_dict)) + " " + str(len(diary_dict)))
device_ids = list(device_dict.keys())[:20] device_ids = list(device_dict.keys())[:20]
diary_ids = list(diary_dict.keys()) diary_ids = list(diary_dict.keys())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment