Commit f9fc2b16 authored by 赵威's avatar 赵威

try predict

parent 68f0379a
......@@ -12,9 +12,10 @@ import tensorflow as tf
from sklearn.model_selection import train_test_split
from models.esmm.fe import (click_feature_engineering, device_feature_engineering, diary_feature_engineering,
get_device_df_from_redis, get_diary_df_from_redis, join_device_diary, join_features, read_csv_data)
get_device_dict_from_redis, get_diary_dict_from_redis, join_device_diary, join_features,
read_csv_data)
from models.esmm.input_fn import build_features, esmm_input_fn
from models.esmm.model import esmm_model_fn, model_export, model_predict
from models.esmm.model import (esmm_model_fn, model_export, model_predict, model_predict2)
# tf.compat.v1.enable_eager_execution()
......@@ -66,9 +67,10 @@ def main():
# print("save to: " + save_path)
save_path = "/home/gmuser/data/models/1595317247"
# save_path = str(Path("~/Desktop/models/1595297428").expanduser())
predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# for i in range(10):
# for i in range(5):
# test_300 = test_df.sample(300)
# model_predict(test_300, predict_fn)
......@@ -78,73 +80,81 @@ def main():
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
# ]
df = get_device_df_from_redis()
df2 = get_diary_df_from_redis()
redis_device_df = device_feature_engineering(df)
redis_diary_df = diary_feature_engineering(df2, from_redis=True)
device_ids = list(redis_device_df["device_id"].values)[:20]
diary_ids = list(redis_diary_df["card_id"].values)
def test1():
time_1 = timeit.default_timer()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1)
print("join df cost {:.5f}s".format(total_1))
time_1 = timeit.default_timer()
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1)
print("total prediction cost {:.5f}s".format(total_1), "\n")
def test2():
time_1 = timeit.default_timer()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1)
print("join df cost {:.5f}s".format(total_1))
time_1 = timeit.default_timer()
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1)
print("total prediction cost {:.5f}s".format(total_1), "\n")
def test3():
time_1 = timeit.default_timer()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1)
print("join df cost {:.5f}s".format(total_1))
time_1 = timeit.default_timer()
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1)
print("total prediction cost {:.5f}s".format(total_1), "\n")
def test4():
time_1 = timeit.default_timer()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1)
print("join df cost {:.5f}s".format(total_1))
time_1 = timeit.default_timer()
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1)
print("total prediction cost {:.5f}s".format(total_1), "\n")
def test5():
time_1 = timeit.default_timer()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1)
print("join df cost {:.5f}s".format(total_1))
time_1 = timeit.default_timer()
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1)
print("total prediction cost {:.5f}s".format(total_1), "\n")
test1()
test2()
test3()
test4()
test5()
# df = get_device_df_from_redis()
# df2 = get_diary_df_from_redis()
# redis_device_df = device_feature_engineering(df)
# redis_diary_df = diary_feature_engineering(df2, from_redis=True)
# device_ids = list(redis_device_df["device_id"].values)[:20]
# diary_ids = list(redis_diary_df["card_id"].values)
device_dict = get_device_dict_from_redis()
diary_dict = get_diary_dict_from_redis()
device_ids = list(device_dict.keys())[:20]
diary_ids = list(diary_dict.keys())
model_predict2(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), device_dict, diary_dict, predict_fn)
# def test1():
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
# total_1 = (timeit.default_timer() - time_1)
# print("join df cost {:.5f}s".format(total_1))
# time_1 = timeit.default_timer()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# def test2():
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
# total_1 = (timeit.default_timer() - time_1)
# print("join df cost {:.5f}s".format(total_1))
# time_1 = timeit.default_timer()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# def test3():
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
# total_1 = (timeit.default_timer() - time_1)
# print("join df cost {:.5f}s".format(total_1))
# time_1 = timeit.default_timer()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# def test4():
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
# total_1 = (timeit.default_timer() - time_1)
# print("join df cost {:.5f}s".format(total_1))
# time_1 = timeit.default_timer()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# def test5():
# time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
# total_1 = (timeit.default_timer() - time_1)
# print("join df cost {:.5f}s".format(total_1))
# time_1 = timeit.default_timer()
# model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# test1()
# test2()
# test3()
# test4()
# test5()
total_time = (time.time() - time_begin) / 60
print("total cost {:.2f} mins at {}".format(total_time, datetime.now()))
......
This diff is collapsed.
import timeit
import numba
import tensorflow as tf
from tensorflow import feature_column as fc
from tensorflow.python.estimator.canned import head as head_lib
from tensorflow.python.ops.losses import losses
from .fe import device_diary_fe
from .utils import common_elements, nth_element
def build_deep_layer(net, params):
for num_hidden_units in params["hidden_units"]:
......@@ -92,6 +94,41 @@ def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def model_predict2(device_id, diary_ids, device_dict, diary_dict, predict_fn):
time_1 = timeit.default_timer()
device_info, diary_lst = device_diary_fe(device_id, diary_ids, device_dict, diary_dict)
int_columns = [
"active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
"topic_num", "favor_num", "vote_num"
]
float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
str_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
"content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3",
"ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3"
]
examples = []
for diary_info in diary_lst:
tmp = {}
tmp.update(device_info)
tmp.update(diary_info)
features = {}
for (col, value) in tmp.items():
if col in int_columns:
features[col] = _int64_feature(int(value))
elif col in float_columns:
features[col] = _float_feature(float(value))
elif col in str_columns:
features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
example = tf.train.Example(features=tf.train.Features(feature=features))
examples.append(example.SerializeToString())
predictions = predict_fn({"examples": examples})
total_1 = (timeit.default_timer() - time_1)
print("prediction cost {:.5f}s".format(total_1))
return predictions
def model_predict(inputs, predict_fn):
time_1 = timeit.default_timer()
int_columns = [
......@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn):
if col in ["click_label", "conversion_label"]:
pass
elif col in int_columns:
features[col] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)]))
features[col] = _int64_feature(int(value))
elif col in float_columns:
features[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[float(value)]))
features[col] = _float_feature(float(value))
else:
features[col] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value).encode(encoding="utf-8")]))
features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
example = tf.train.Example(features=tf.train.Features(feature=features))
examples.append(example.SerializeToString())
total_1 = (timeit.default_timer() - time_1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment