Commit 72ecf50f authored by 赵威's avatar 赵威

add printer

parent a08d32a5
......@@ -20,36 +20,36 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict
def main():
time_begin = time.time()
device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
# print(diary_df.sample(1))
device_df = device_feature_engineering(device_df)
# print(device_df.sample(1))
diary_df = diary_feature_engineering(diary_df)
# print(diary_df.sample(1))
cc_df = click_feature_engineering(click_df, conversion_df)
df = join_features(device_df, diary_df, cc_df)
# device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
# # print(diary_df.sample(1))
# device_df = device_feature_engineering(device_df)
# # print(device_df.sample(1))
# diary_df = diary_feature_engineering(diary_df)
# # print(diary_df.sample(1))
# cc_df = click_feature_engineering(click_df, conversion_df)
# df = join_features(device_df, diary_df, cc_df)
train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)
# train_df, test_df = train_test_split(df, test_size=0.2)
# train_df, val_df = train_test_split(train_df, test_size=0.2)
all_features = build_features(df)
# all_features = build_features(df)
params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
model_path = str(Path("~/data/model_tmp/").expanduser())
if os.path.exists(model_path):
shutil.rmtree(model_path)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
# params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
# model_path = str(Path("~/data/model_tmp/").expanduser())
# if os.path.exists(model_path):
# shutil.rmtree(model_path)
# model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
print("train")
model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000)
metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000)
print("metrics: " + str(metrics))
# print("train")
# model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000)
# metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000)
# print("metrics: " + str(metrics))
model_export_path = str(Path("~/data/models/").expanduser())
save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path)
# model_export_path = str(Path("~/data/models/").expanduser())
# save_path = model_export(model, all_features, model_export_path)
# print("save to: " + save_path)
predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# for i in range(10):
# test_300 = test_df.sample(300)
......@@ -63,13 +63,18 @@ def main():
df = get_device_df_from_redis()
df2 = get_diary_df_from_redis()
redis_device_df = device_feature_engineering(df)
redis_diary_df = diary_feature_engineering(df2, from_redis=True)
time_1 = timeit.default_timer()
res = join_device_diary(device_id, diary_ids, redis_device_df, redis_diary_df)
print(len(res))
print(res.sample(1), "\n")
print(res.sample(1))
model_predict(res, predict_fn)
# model_predict(res, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("prediction total cost {:.5f}s".format(total_1))
total_time = (time.time() - time_begin) / 60
print("cost {:.2f} mins at {}".format(total_time, datetime.now()))
......
......@@ -113,6 +113,6 @@ def model_predict(inputs, predict_fn):
time_1 = timeit.default_timer()
predictions = predict_fn({"examples": examples})
total_1 = (timeit.default_timer() - time_1)
print("prediction cost {:.5f} s".format(total_1))
print("prediction cost {:.5f}s".format(total_1))
# print(predictions)
return predictions
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment