Commit 1d9857f8 authored by 赵威's avatar 赵威

update steps

parent 932e5762
...@@ -29,14 +29,14 @@ def main(): ...@@ -29,14 +29,14 @@ def main():
device_df, diary_df, click_df, conversion_df = diary_fe.read_csv_data(Path("/srv/apps/node2vec_git/cvr_data/")) device_df, diary_df, click_df, conversion_df = diary_fe.read_csv_data(Path("/srv/apps/node2vec_git/cvr_data/"))
# print(diary_df.sample(1)) # print(diary_df.sample(1))
device_df = device_fe.device_feature_engineering(device_df) device_df = device_fe.device_feature_engineering(device_df)
print(device_df.sample(1)) # print(device_df.sample(1))
diary_df = diary_fe.diary_feature_engineering(diary_df) diary_df = diary_fe.diary_feature_engineering(diary_df)
print(diary_df.sample(1)) # print(diary_df.sample(1))
cc_df = diary_fe.click_feature_engineering(click_df, conversion_df) cc_df = diary_fe.click_feature_engineering(click_df, conversion_df)
print(cc_df.sample(1)) # print(cc_df.sample(1))
df = diary_fe.join_features(device_df, diary_df, cc_df) df = diary_fe.join_features(device_df, diary_df, cc_df)
print(df.sample(1)) # print(df.sample(1))
print(df.dtypes) # print(df.dtypes)
train_df, test_df = train_test_split(df, test_size=0.2) train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2) train_df, val_df = train_test_split(train_df, test_size=0.2)
...@@ -53,8 +53,7 @@ def main(): ...@@ -53,8 +53,7 @@ def main():
estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
# TODO 50000 train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
tf.estimator.train_and_evaluate(model, train_spec, eval_spec) tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
...@@ -74,7 +73,7 @@ def main(): ...@@ -74,7 +73,7 @@ def main():
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
print("==============================") print("============================================================")
# device_id = "861601036552944" # device_id = "861601036552944"
# diary_ids = [ # diary_ids = [
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988" # "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
......
...@@ -5,6 +5,7 @@ from .utils import create_boundaries, create_vocabulary_list ...@@ -5,6 +5,7 @@ from .utils import create_boundaries, create_vocabulary_list
def build_features(df): def build_features(df):
# TODO
int_columns = ["active_days", "topic_num", "favor_num", "vote_num"] int_columns = ["active_days", "topic_num", "favor_num", "vote_num"]
float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"] float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
numeric_features = [] numeric_features = []
...@@ -14,6 +15,7 @@ def build_features(df): ...@@ -14,6 +15,7 @@ def build_features(df):
else: else:
numeric_features.append(fc.bucketized_column(fc.numeric_column(col), boundaries=create_boundaries(df, col))) numeric_features.append(fc.bucketized_column(fc.numeric_column(col), boundaries=create_boundaries(df, col)))
# TODO
categorical_columns = [ categorical_columns = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history",
"price_sensitive_history", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "price_sensitive_history", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
......
...@@ -99,6 +99,7 @@ def model_predict_diary(device_id, diary_ids, device_dict, diary_dict, predict_f ...@@ -99,6 +99,7 @@ def model_predict_diary(device_id, diary_ids, device_dict, diary_dict, predict_f
time_1 = timeit.default_timer() time_1 = timeit.default_timer()
device_info, diary_lst, diary_ids_res = device_diary_fe(device_id, diary_ids, device_dict, diary_dict) device_info, diary_lst, diary_ids_res = device_diary_fe(device_id, diary_ids, device_dict, diary_dict)
print("predict check: " + str(len(diary_lst)) + " " + str(len(diary_ids_res))) print("predict check: " + str(len(diary_lst)) + " " + str(len(diary_ids_res)))
# TODO
int_columns = [ int_columns = [
"active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
"topic_num", "favor_num", "vote_num" "topic_num", "favor_num", "vote_num"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment