Commit f9fc2b16 authored by 赵威's avatar 赵威

try predict

parent 68f0379a
...@@ -12,9 +12,10 @@ import tensorflow as tf ...@@ -12,9 +12,10 @@ import tensorflow as tf
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from models.esmm.fe import (click_feature_engineering, device_feature_engineering, diary_feature_engineering, from models.esmm.fe import (click_feature_engineering, device_feature_engineering, diary_feature_engineering,
get_device_df_from_redis, get_diary_df_from_redis, join_device_diary, join_features, read_csv_data) get_device_dict_from_redis, get_diary_dict_from_redis, join_device_diary, join_features,
read_csv_data)
from models.esmm.input_fn import build_features, esmm_input_fn from models.esmm.input_fn import build_features, esmm_input_fn
from models.esmm.model import esmm_model_fn, model_export, model_predict from models.esmm.model import (esmm_model_fn, model_export, model_predict, model_predict2)
# tf.compat.v1.enable_eager_execution() # tf.compat.v1.enable_eager_execution()
...@@ -66,9 +67,10 @@ def main(): ...@@ -66,9 +67,10 @@ def main():
# print("save to: " + save_path) # print("save to: " + save_path)
save_path = "/home/gmuser/data/models/1595317247" save_path = "/home/gmuser/data/models/1595317247"
# save_path = str(Path("~/Desktop/models/1595297428").expanduser())
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
# for i in range(10): # for i in range(5):
# test_300 = test_df.sample(300) # test_300 = test_df.sample(300)
# model_predict(test_300, predict_fn) # model_predict(test_300, predict_fn)
...@@ -78,73 +80,81 @@ def main(): ...@@ -78,73 +80,81 @@ def main():
# "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988" # "16195283", "16838351", "17161073", "17297878", "17307484", "17396235", "16418737", "16995481", "17312201", "12237988"
# ] # ]
df = get_device_df_from_redis() # df = get_device_df_from_redis()
df2 = get_diary_df_from_redis() # df2 = get_diary_df_from_redis()
redis_device_df = device_feature_engineering(df) # redis_device_df = device_feature_engineering(df)
redis_diary_df = diary_feature_engineering(df2, from_redis=True) # redis_diary_df = diary_feature_engineering(df2, from_redis=True)
device_ids = list(redis_device_df["device_id"].values)[:20] # device_ids = list(redis_device_df["device_id"].values)[:20]
diary_ids = list(redis_diary_df["card_id"].values) # diary_ids = list(redis_diary_df["card_id"].values)
def test1(): device_dict = get_device_dict_from_redis()
time_1 = timeit.default_timer() diary_dict = get_diary_dict_from_redis()
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1) device_ids = list(device_dict.keys())[:20]
print("join df cost {:.5f}s".format(total_1)) diary_ids = list(diary_dict.keys())
time_1 = timeit.default_timer() model_predict2(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), device_dict, diary_dict, predict_fn)
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1) # def test1():
print("total prediction cost {:.5f}s".format(total_1), "\n") # time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def test2(): # total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("join df cost {:.5f}s".format(total_1))
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1) # time_1 = timeit.default_timer()
print("join df cost {:.5f}s".format(total_1)) # model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1) # def test2():
print("total prediction cost {:.5f}s".format(total_1), "\n") # time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def test3(): # total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("join df cost {:.5f}s".format(total_1))
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1) # time_1 = timeit.default_timer()
print("join df cost {:.5f}s".format(total_1)) # model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1) # def test3():
print("total prediction cost {:.5f}s".format(total_1), "\n") # time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def test4(): # total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("join df cost {:.5f}s".format(total_1))
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1) # time_1 = timeit.default_timer()
print("join df cost {:.5f}s".format(total_1)) # model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1) # def test4():
print("total prediction cost {:.5f}s".format(total_1), "\n") # time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
def test5(): # total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("join df cost {:.5f}s".format(total_1))
user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
total_1 = (timeit.default_timer() - time_1) # time_1 = timeit.default_timer()
print("join df cost {:.5f}s".format(total_1)) # model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
time_1 = timeit.default_timer() # print("total prediction cost {:.5f}s".format(total_1), "\n")
model_predict(user1, predict_fn)
total_1 = (timeit.default_timer() - time_1) # def test5():
print("total prediction cost {:.5f}s".format(total_1), "\n") # time_1 = timeit.default_timer()
# user1 = join_device_diary(random.sample(device_ids, 1)[0], random.sample(diary_ids, 300), redis_device_df, redis_diary_df)
test1() # total_1 = (timeit.default_timer() - time_1)
test2() # print("join df cost {:.5f}s".format(total_1))
test3()
test4() # time_1 = timeit.default_timer()
test5() # model_predict(user1, predict_fn)
# total_1 = (timeit.default_timer() - time_1)
# print("total prediction cost {:.5f}s".format(total_1), "\n")
# test1()
# test2()
# test3()
# test4()
# test5()
total_time = (time.time() - time_begin) / 60 total_time = (time.time() - time_begin) / 60
print("total cost {:.2f} mins at {}".format(total_time, datetime.now())) print("total cost {:.2f} mins at {}".format(total_time, datetime.now()))
......
import timeit
import pandas as pd import pandas as pd
from utils.cache import redis_db_client from utils.cache import redis_db_client
...@@ -15,24 +17,65 @@ def read_csv_data(dataset_path): ...@@ -15,24 +17,65 @@ def read_csv_data(dataset_path):
return device_df, diary_df, click_df, conversion_df return device_df, diary_df, click_df, conversion_df
def _get_data_from_redis(key): # def _get_data_from_redis(key):
column_key = key + ":column" # column_key = key + ":column"
d = redis_db_client.hgetall(key) # d = redis_db_client.hgetall(key)
tmp = d.values() # tmp = d.values()
lists = [] # lists = []
for i in tmp: # for i in tmp:
lists.append(str(i, "utf-8").split("|")) # lists.append(str(i, "utf-8").split("|"))
columns = str(redis_db_client.get(column_key), "utf-8").split("|") # columns = str(redis_db_client.get(column_key), "utf-8").split("|")
df = pd.DataFrame(lists, columns=columns) # df = pd.DataFrame(lists, columns=columns)
return df # return df
def get_device_df_from_redis():
return _get_data_from_redis("cvr:db:device")
def get_diary_df_from_redis(): def get_device_dict_from_redis():
return _get_data_from_redis("cvr:db:content:diary") db_key = "cvr:db:device"
column_key = db_key + ":column"
columns = str(redis_db_client.get(column_key), "utf-8").split("|")
d = redis_db_client.hgetall(db_key)
res = {}
for i in d.values():
row_list = str(i, "utf-8").split("|")
tmp = {}
for (index, elem) in enumerate(row_list):
col_name = columns[index]
if col_name in [
"first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
"second_positions", "projects"
]:
tmp[col_name] = elem.split(",")
else:
tmp[col_name] = elem
res[tmp["device_id"]] = tmp
return res
def get_diary_dict_from_redis():
db_key = "cvr:db:content:diary"
column_key = db_key + ":column"
columns = str(redis_db_client.get(column_key), "utf-8").split("|")
d = redis_db_client.hgetall(db_key)
res = {}
for i in d.values():
row_list = str(i, "utf-8").split("|")
tmp = {}
for (index, elem) in enumerate(row_list):
col_name = columns[index]
if col_name in [
"first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
"second_positions", "projects"
]:
tmp[col_name] = elem.split(",")
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
if elem == "true":
tmp[col_name] = 1
else:
tmp[col_name] = 0
else:
tmp[col_name] = elem
res[tmp["card_id"]] = tmp
return res
def device_feature_engineering(df): def device_feature_engineering(df):
...@@ -195,64 +238,133 @@ def join_features(device_df, diary_df, cc_df): ...@@ -195,64 +238,133 @@ def join_features(device_df, diary_df, cc_df):
return df return df
def join_device_diary(device_id, diary_ids, device_df, diary_df): # def join_device_diary(device_id, diary_ids, device_df, diary_df):
a_df = device_df.loc[device_df["device_id"] == device_id] # a_df = device_df.loc[device_df["device_id"] == device_id]
b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)] # b_df = diary_df.loc[diary_df["card_id"].isin(diary_ids)]
b_df["device_id"] = device_id # b_df["device_id"] = device_id
df = pd.merge(a_df, b_df, how="left", on="device_id") # df = pd.merge(a_df, b_df, how="left", on="device_id")
df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1) # df["first_demands"] = df[["first_demands_x", "first_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1) # df["second_demands"] = df[["second_demands_x", "second_demands_y"]].apply(lambda x: common_elements(*x), axis=1)
df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1) # df["first_solutions"] = df[["first_solutions_x", "first_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1) # df["second_solutions"] = df[["second_solutions_x", "second_solutions_y"]].apply(lambda x: common_elements(*x), axis=1)
df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1) # df["first_positions"] = df[["first_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1) # df["second_positions"] = df[["second_positions_x", "second_positions_y"]].apply(lambda x: common_elements(*x), axis=1)
df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1) # df["projects"] = df[["projects_x", "projects_y"]].apply(lambda x: common_elements(*x), axis=1)
df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0)) # df["device_fd"] = df["first_demands_x"].apply(lambda x: nth_element(x, 0))
df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0)) # df["device_sd"] = df["second_demands_x"].apply(lambda x: nth_element(x, 0))
df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0)) # df["device_fs"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 0))
df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0)) # df["device_ss"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 0))
df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0)) # df["device_fp"] = df["first_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0)) # df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0)) # df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0)) # df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0)) # df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0)) # df["content_fs"] = df["first_solutions_y"].apply(lambda x: nth_element(x, 0))
df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0)) # df["content_ss"] = df["second_solutions_y"].apply(lambda x: nth_element(x, 0))
df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0)) # df["content_fp"] = df["first_positions_y"].apply(lambda x: nth_element(x, 0))
df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0)) # df["content_sp"] = df["second_positions_y"].apply(lambda x: nth_element(x, 0))
df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0)) # df["content_p"] = df["projects_y"].apply(lambda x: nth_element(x, 0))
df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0)) # df["fd1"] = df["first_demands"].apply(lambda x: nth_element(x, 0))
df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1)) # df["fd2"] = df["first_demands"].apply(lambda x: nth_element(x, 1))
df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2)) # df["fd3"] = df["first_demands"].apply(lambda x: nth_element(x, 2))
df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0)) # df["sd1"] = df["second_demands"].apply(lambda x: nth_element(x, 0))
df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1)) # df["sd2"] = df["second_demands"].apply(lambda x: nth_element(x, 1))
df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2)) # df["sd3"] = df["second_demands"].apply(lambda x: nth_element(x, 2))
df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0)) # df["fs1"] = df["first_solutions"].apply(lambda x: nth_element(x, 0))
df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1)) # df["fs2"] = df["first_solutions"].apply(lambda x: nth_element(x, 1))
df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2)) # df["fs3"] = df["first_solutions"].apply(lambda x: nth_element(x, 2))
df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0)) # df["ss1"] = df["second_solutions"].apply(lambda x: nth_element(x, 0))
df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1)) # df["ss2"] = df["second_solutions"].apply(lambda x: nth_element(x, 1))
df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2)) # df["ss3"] = df["second_solutions"].apply(lambda x: nth_element(x, 2))
df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0)) # df["fp1"] = df["first_positions"].apply(lambda x: nth_element(x, 0))
df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1)) # df["fp2"] = df["first_positions"].apply(lambda x: nth_element(x, 1))
df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2)) # df["fp3"] = df["first_positions"].apply(lambda x: nth_element(x, 2))
df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0)) # df["sp1"] = df["second_positions"].apply(lambda x: nth_element(x, 0))
df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1)) # df["sp2"] = df["second_positions"].apply(lambda x: nth_element(x, 1))
df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2)) # df["sp3"] = df["second_positions"].apply(lambda x: nth_element(x, 2))
df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0)) # df["p1"] = df["projects"].apply(lambda x: nth_element(x, 0))
df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1)) # df["p2"] = df["projects"].apply(lambda x: nth_element(x, 1))
df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2)) # df["p3"] = df["projects"].apply(lambda x: nth_element(x, 2))
drop_columns = [ # drop_columns = [
"first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands", # "first_demands_x", "first_demands_y", "first_demands", "second_demands_x", "second_demands_y", "second_demands",
"first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y", # "first_solutions_x", "first_solutions_y", "first_solutions", "second_solutions_x", "second_solutions_y",
"second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x", # "second_solutions", "first_positions_x", "first_positions_y", "first_positions", "second_positions_x",
"second_positions_y", "second_positions", "projects_x", "projects_y", "projects" # "second_positions_y", "second_positions", "projects_x", "projects_y", "projects"
] # ]
df.drop(drop_columns, inplace=True, axis=1) # df.drop(drop_columns, inplace=True, axis=1)
return df # return df
def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
time_1 = timeit.default_timer()
device_info = device_dict.get(device_id, {}).copy()
device_fd = device_info.get("first_demands", [])
device_sd = device_info.get("second_demands", [])
device_fs = device_info.get("first_solutions", [])
device_ss = device_info.get("second_solutions", [])
device_fp = device_info.get("first_positions", [])
device_sp = device_info.get("second_positions", [])
device_p = device_info.get("projects", [])
device_info["device_fd"] = nth_element(device_fd, 0)
device_info["device_sd"] = nth_element(device_sd, 0)
device_info["device_fs"] = nth_element(device_fs, 0)
device_info["device_ss"] = nth_element(device_ss, 0)
device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0)
diary_lst = []
for id in diary_ids:
tmp = diary_dict.get(id, {}).copy()
if tmp:
diary_fd = tmp.get("first_demands", [])
diary_sd = tmp.get("second_demands", [])
diary_fs = tmp.get("first_solutions", [])
diary_ss = tmp.get("second_solutions", [])
diary_fp = tmp.get("first_positions", [])
diary_sp = tmp.get("second_positions", [])
diary_p = tmp.get("projects", [])
common_fd = common_elements(device_fd, diary_fd)
common_sd = common_elements(device_sd, diary_sd)
common_fs = common_elements(device_fs, diary_fs)
common_ss = common_elements(device_ss, diary_ss)
common_fp = common_elements(device_fp, diary_fp)
common_sp = common_elements(device_sp, diary_sp)
common_p = common_elements(device_p, diary_p)
tmp["diary_fd"] = nth_element(diary_fd, 0)
tmp["diary_sd"] = nth_element(diary_sd, 0)
tmp["diary_fs"] = nth_element(diary_fs, 0)
tmp["diary_ss"] = nth_element(diary_ss, 0)
tmp["diary_fp"] = nth_element(diary_fp, 0)
tmp["diary_sp"] = nth_element(diary_sp, 0)
tmp["diary_p"] = nth_element(diary_p, 0)
tmp["fd1"] = nth_element(common_fd, 0)
tmp["fd2"] = nth_element(common_fd, 1)
tmp["fd3"] = nth_element(common_fd, 2)
tmp["sd1"] = nth_element(common_sd, 0)
tmp["sd2"] = nth_element(common_sd, 1)
tmp["sd3"] = nth_element(common_sd, 2)
tmp["fs1"] = nth_element(common_fs, 0)
tmp["fs2"] = nth_element(common_fs, 1)
tmp["fs3"] = nth_element(common_fs, 2)
tmp["ss1"] = nth_element(common_ss, 0)
tmp["ss2"] = nth_element(common_ss, 1)
tmp["ss3"] = nth_element(common_ss, 2)
tmp["fp1"] = nth_element(common_fp, 0)
tmp["fp2"] = nth_element(common_fp, 1)
tmp["fp3"] = nth_element(common_fp, 2)
tmp["sp1"] = nth_element(common_sp, 0)
tmp["sp2"] = nth_element(common_sp, 1)
tmp["sp3"] = nth_element(common_sp, 2)
tmp["p1"] = nth_element(common_p, 0)
tmp["p2"] = nth_element(common_p, 1)
tmp["p3"] = nth_element(common_p, 2)
diary_lst.append(tmp)
total_1 = (timeit.default_timer() - time_1)
print("join device diary cost {:.5f}s".format(total_1))
return device_info, diary_lst
import timeit import timeit
import numba
import tensorflow as tf import tensorflow as tf
from tensorflow import feature_column as fc from tensorflow import feature_column as fc
from tensorflow.python.estimator.canned import head as head_lib from tensorflow.python.estimator.canned import head as head_lib
from tensorflow.python.ops.losses import losses from tensorflow.python.ops.losses import losses
from .fe import device_diary_fe
from .utils import common_elements, nth_element
def build_deep_layer(net, params): def build_deep_layer(net, params):
for num_hidden_units in params["hidden_units"]: for num_hidden_units in params["hidden_units"]:
...@@ -92,6 +94,41 @@ def _bytes_feature(value): ...@@ -92,6 +94,41 @@ def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def model_predict2(device_id, diary_ids, device_dict, diary_dict, predict_fn):
time_1 = timeit.default_timer()
device_info, diary_lst = device_diary_fe(device_id, diary_ids, device_dict, diary_dict)
int_columns = [
"active_type", "active_days", "card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level",
"topic_num", "favor_num", "vote_num"
]
float_columns = ["one_ctr", "three_ctr", "seven_ctr", "fifteen_ctr"]
str_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
"content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3",
"ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3"
]
examples = []
for diary_info in diary_lst:
tmp = {}
tmp.update(device_info)
tmp.update(diary_info)
features = {}
for (col, value) in tmp.items():
if col in int_columns:
features[col] = _int64_feature(int(value))
elif col in float_columns:
features[col] = _float_feature(float(value))
elif col in str_columns:
features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
example = tf.train.Example(features=tf.train.Features(feature=features))
examples.append(example.SerializeToString())
predictions = predict_fn({"examples": examples})
total_1 = (timeit.default_timer() - time_1)
print("prediction cost {:.5f}s".format(total_1))
return predictions
def model_predict(inputs, predict_fn): def model_predict(inputs, predict_fn):
time_1 = timeit.default_timer() time_1 = timeit.default_timer()
int_columns = [ int_columns = [
...@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn): ...@@ -106,11 +143,11 @@ def model_predict(inputs, predict_fn):
if col in ["click_label", "conversion_label"]: if col in ["click_label", "conversion_label"]:
pass pass
elif col in int_columns: elif col in int_columns:
features[col] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(value)])) features[col] = _int64_feature(int(value))
elif col in float_columns: elif col in float_columns:
features[col] = tf.train.Feature(float_list=tf.train.FloatList(value=[float(value)])) features[col] = _float_feature(float(value))
else: else:
features[col] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[str(value).encode(encoding="utf-8")])) features[col] = _bytes_feature(str(value).encode(encoding="utf-8"))
example = tf.train.Example(features=tf.train.Features(feature=features)) example = tf.train.Example(features=tf.train.Features(feature=features))
examples.append(example.SerializeToString()) examples.append(example.SerializeToString())
total_1 = (timeit.default_timer() - time_1) total_1 = (timeit.default_timer() - time_1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment