Commit 70e96a9b authored by 赵威's avatar 赵威

train

parent a0f81d00
import os
import pickle
import random
import shutil
import time
import timeit
from datetime import datetime
from functools import wraps
from pathlib import Path
import pandas as pd
......@@ -20,61 +18,43 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict_diary
# tf.compat.v1.enable_eager_execution()
def time_cost(func):
@wraps(func)
def wrapper(*args, **kwargs):
time_1 = timeit.default_timer()
ret = func(*args, **kwargs)
total_1 = (timeit.default_timer() - time_1)
print("cost {:.5f}s".format(total_1))
return ret
return wrapper
def main():
time_begin = time.time()
# device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
# # print(diary_df.sample(1))
# device_df = device_feature_engineering(device_df)
# # print(device_df.sample(1))
# diary_df = diary_feature_engineering(diary_df)
# # print(diary_df.sample(1))
# cc_df = click_feature_engineering(click_df, conversion_df)
# df = join_features(device_df, diary_df, cc_df)
device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
# print(diary_df.sample(1))
device_df = device_feature_engineering(device_df)
# print(device_df.sample(1))
diary_df = diary_feature_engineering(diary_df)
# print(diary_df.sample(1))
cc_df = click_feature_engineering(click_df, conversion_df)
df = join_features(device_df, diary_df, cc_df)
# train_df, test_df = train_test_split(df, test_size=0.2)
# train_df, val_df = train_test_split(train_df, test_size=0.2)
train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)
# all_features = build_features(df)
# params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
# model_path = str(Path("~/data/model_tmp/").expanduser())
# if os.path.exists(model_path):
# shutil.rmtree(model_path)
# model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
all_features = build_features(df)
params = {"feature_columns": all_features, "hidden_units": [64, 32], "learning_rate": 0.1}
model_path = str(Path("~/data/model_tmp/").expanduser())
if os.path.exists(model_path):
shutil.rmtree(model_path)
# print("train")
# model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), steps=5000)
# metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False), steps=5000)
# print("metrics: " + str(metrics))
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path)
model.train(input_fn=lambda: esmm_input_fn(train_df, shuffle=True))
metrics = model.evaluate(input_fn=lambda: esmm_input_fn(val_df, False))
print("metrics: " + str(metrics))
# model_export_path = str(Path("~/data/models/").expanduser())
# save_path = model_export(model, all_features, model_export_path)
# print("save to: " + save_path)
model_export_path = str(Path("~/data/models/").expanduser())
save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path)
save_path = "/home/gmuser/data/models/1595317247"
# save_path = "/home/gmuser/data/models/1595317247"
# save_path = str(Path("~/Desktop/models/1595297428").expanduser())
filename = save_path
# tf.saved_model.load
predict_fn = tf.contrib.predictor.from_saved_model(save_path)
res = pickle.dumps(predict_fn)
print(res)
# for i in range(5):
# test_300 = test_df.sample(300)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment