Commit 72ecf50f authored by 赵威's avatar 赵威

add printer

parent a08d32a5
...@@ -20,36 +20,36 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict ...@@ -20,36 +20,36 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict
def main(): def main():
time_begin = time.time() time_begin = time.time()
device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/")) # device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
# print(diary_df.sample(1)) # # print(diary_df.sample(1))
device_df = device_feature_engineering(device_df) # device_df = device_feature_engineering(device_df)
# print(device_df.sample(1)) # # print(device_df.sample(1))
diary_df = diary_feature_engineering(diary_df) # diary_df = diary_feature_engineering(diary_df)
# print(diary_df.sample(1)) # # print(diary_df.sample(1))
cc_df = click_feature_engineering(click_df, conversion_df) # cc_df = click_feature_engineering(click_df, conversion_df)
df = join_features(device_df, diary_df, cc_df) # df = join_features(device_df, diary_df, cc_df)
train_df, test_df = train_test_split(df, test_size=0.2) # train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2) # train_df, val_df = train_test_split(train_df, test_size=0.2)
all_features = build_features(df) # all_features = build_features(df)
params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1} # params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
model_path = str(Path("~/data/model_tmp/").expanduser()) # model_path = str(Path("~/data/model_tmp/").expanduser())
if os.path.exists(model_path): # if os.path.exists(model_path):
shutil.rmtree(model_path) # shutil.rmtree(model_path)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path) # model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
print("train") # print("train")
model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000) # model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000)
metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000) # metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000)
print("metrics: " + str(metrics)) # print("metrics: " + str(metrics))
model_export_path = str(Path("~/data/models/").expanduser()) # model_export_path = str(Path("~/data/models/").expanduser())
save_path = model_export(model, all_features, model_export_path) # save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path) # print("save to: " + save_path)
predict_fn = tf.contrib.predictor.from_saved_model(save_path) # predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# for i in range(10): # for i in range(10):
# test_300 = test_df.sample(300) # test_300 = test_df.sample(300)
...@@ -63,13 +63,18 @@ def main(): ...@@ -63,13 +63,18 @@ def main():
df = get_device_df_from_redis() df = get_device_df_from_redis()
df2 = get_diary_df_from_redis() df2 = get_diary_df_from_redis()
redis_device_df = device_feature_engineering(df) redis_device_df = device_feature_engineering(df)
redis_diary_df = diary_feature_engineering(df2, from_redis=True) redis_diary_df = diary_feature_engineering(df2, from_redis=True)
time_1 = timeit.default_timer()
res = join_device_diary(device_id, diary_ids, redis_device_df, redis_diary_df) res = join_device_diary(device_id, diary_ids, redis_device_df, redis_diary_df)
print(len(res)) print(len(res))
print(res.sample(1), "\n")
print(res.sample(1))
model_predict(res, predict_fn) # model_predict(res, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("prediction total cost {:.5f}s".format(total_1))
total_time = (time.time() - time_begin) / 60 total_time = (time.time() - time_begin) / 60
print("cost {:.2f} mins at {}".format(total_time, datetime.now())) print("cost {:.2f} mins at {}".format(total_time, datetime.now()))
......
...@@ -113,6 +113,6 @@ def model_predict(inputs, predict_fn): ...@@ -113,6 +113,6 @@ def model_predict(inputs, predict_fn):
time_1 = timeit.default_timer() time_1 = timeit.default_timer()
predictions = predict_fn({"examples": examples}) predictions = predict_fn({"examples": examples})
total_1 = (timeit.default_timer() - time_1) total_1 = (timeit.default_timer() - time_1)
print("prediction cost {:.5f} s".format(total_1)) print("prediction cost {:.5f}s".format(total_1))
# print(predictions) # print(predictions)
return predictions return predictions
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment