Commit 445b2c54 authored by 赵威's avatar 赵威

get the model

parent 2be148e6
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import activations, layers, losses, metrics, optimizers
base_dir = os.getcwd()
# base_dir = "/Users/offic/work/GM/strategy_embedding/" # TODO remove
DATA_PATH = os.path.join(base_dir, "_data")
MODEL_PATH = os.path.join(base_dir, "_models")
DEVICE_COLUMNS = [
"device_id",
"device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
]
LABEL_COLUMNS = "label"
TRACTATE_COLUMNS = [
"card_id",
"is_pure_author",
"is_have_pure_reply",
"is_have_reply",
"content_level",
"topic_seven_click_num",
"topic_thirty_click_num",
"topic_num",
"seven_transform_num",
"thirty_transform_num",
"favor_num",
"favor_pure_num",
"vote_num",
"vote_display_num",
"reply_num",
"reply_pure_num",
"one_click_num",
"three_click_num",
"seven_click_num",
"fifteen_click_num",
"thirty_click_num",
"sixty_click_num",
"ninety_click_num",
"history_click_num",
"one_precise_exposure_num",
"three_precise_exposure_num",
"seven_precise_exposure_num",
"fifteen_precise_exposure_num",
"thirty_precise_exposure_num",
"sixty_precise_exposure_num",
"ninety_precise_exposure_num",
"history_precise_exposure_num",
"one_vote_user_num",
"three_vote_user_num",
"seven_vote_user_num",
"fifteen_vote_user_num",
"thirty_vote_user_num",
"sixty_vote_user_num",
"ninety_vote_user_num",
"history_vote_user_num",
"one_reply_user_num",
"three_reply_user_num",
"seven_reply_user_num",
"fifteen_reply_user_num",
"thirty_reply_user_num",
"sixty_reply_user_num",
"ninety_reply_user_num",
"history_reply_user_num",
"one_browse_user_num",
"three_browse_user_num",
"seven_browse_user_num",
"fifteen_browse_user_num",
"thirty_browse_user_num",
"sixty_browse_user_num",
"ninety_browse_user_num",
"history_browse_user_num",
"one_reply_num",
"three_reply_num",
"seven_reply_num",
"fifteen_reply_num",
"thirty_reply_num",
"sixty_reply_num",
"ninety_reply_num",
"history_reply_num",
"one_ctr",
"three_ctr",
"seven_ctr",
"fifteen_ctr",
"thirty_ctr",
"sixty_ctr",
"ninety_ctr",
"history_ctr",
"one_vote_pure_rate",
"three_vote_pure_rate",
"seven_vote_pure_rate",
"fifteen_vote_pure_rate",
"thirty_vote_pure_rate",
"sixty_vote_pure_rate",
"ninety_vote_pure_rate",
"history_vote_pure_rate",
"one_reply_pure_rate",
"three_reply_pure_rate",
"seven_reply_pure_rate",
"fifteen_reply_pure_rate",
"thirty_reply_pure_rate",
"sixty_reply_pure_rate",
"ninety_reply_pure_rate",
"history_reply_pure_rate",
"card_fd",
"card_sd",
"card_fs",
"card_ss",
"card_fp",
"card_sp",
"card_p",
"card_fd2",
"card_sd2",
"card_fs2",
"card_ss2",
"card_fp2",
"card_sp2",
"card_p2",
]
def nth_element(lst, n):
if n >= len(lst):
return ""
return lst[n]
def get_df(file):
full_path = os.path.join(DATA_PATH, file)
df = pd.read_csv(full_path, sep="|")
return df
def device_tractae_fe():
click_df = get_df("tractate_click.csv")
exposure_df = get_df("tractate_exposure.csv")
device_fe_df = get_df("device_feature.csv")
tractate_fe_df = get_df("tractate_feature.csv")
print(click_df.shape)
print(exposure_df.shape)
print(device_fe_df.shape)
print(tractate_fe_df.shape)
#
click_df.drop("partition_date", inplace=True, axis=1)
exposure_df.drop("partition_date", inplace=True, axis=1)
base_df = pd.merge(click_df, exposure_df, how="outer", indicator="Exist")
base_df["label"] = np.where(base_df["Exist"] == "right_only", 0.75, 1.0)
base_df.drop("Exist", inplace=True, axis=1)
#
device_fe_df.fillna("", inplace=True)
device_fe_df.rename(columns={"cl_id": "device_id"}, inplace=True)
device_fe_df["first_demands"] = device_fe_df["first_demands"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["second_demands"] = device_fe_df["second_demands"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["first_solutions"] = device_fe_df["first_solutions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["second_solutions"] = device_fe_df["second_solutions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["first_positions"] = device_fe_df["first_positions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["second_positions"] = device_fe_df["second_positions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["projects"] = device_fe_df["projects"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
device_fe_df["device_fd"] = device_fe_df["first_demands"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_sd"] = device_fe_df["second_demands"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_fs"] = device_fe_df["first_solutions"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_ss"] = device_fe_df["second_solutions"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_fp"] = device_fe_df["first_positions"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_sp"] = device_fe_df["second_positions"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_p"] = device_fe_df["projects"].apply(lambda x: nth_element(x, 0))
device_fe_df["device_fd2"] = device_fe_df["first_demands"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_sd2"] = device_fe_df["second_demands"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_fs2"] = device_fe_df["first_solutions"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_ss2"] = device_fe_df["second_solutions"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_fp2"] = device_fe_df["first_positions"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_sp2"] = device_fe_df["second_positions"].apply(lambda x: nth_element(x, 1))
device_fe_df["device_p2"] = device_fe_df["projects"].apply(lambda x: nth_element(x, 1))
_drop_columns = [
"first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions", "second_positions",
"projects"
]
device_fe_df.drop(columns=_drop_columns, axis=1, inplace=True)
#
_card_drop_columns = [
"card_first_demands", "card_second_demands", "card_first_solutions", "card_second_solutions", "card_first_positions",
"card_second_positions", "card_projects"
]
tractate_fe_df[_card_drop_columns].fillna("", inplace=True)
tractate_fe_df["card_first_demands"] = tractate_fe_df["card_first_demands"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_second_demands"] = tractate_fe_df["card_second_demands"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_first_solutions"] = tractate_fe_df["card_first_solutions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_second_solutions"] = tractate_fe_df["card_second_solutions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_first_positions"] = tractate_fe_df["card_first_positions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_second_positions"] = tractate_fe_df["card_second_positions"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_projects"] = tractate_fe_df["card_projects"].str.split(",").\
apply(lambda d: d if isinstance(d, list) else [])
tractate_fe_df["card_fd"] = tractate_fe_df["card_first_demands"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_sd"] = tractate_fe_df["card_second_demands"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_fs"] = tractate_fe_df["card_first_solutions"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_ss"] = tractate_fe_df["card_second_solutions"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_fp"] = tractate_fe_df["card_first_positions"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_sp"] = tractate_fe_df["card_second_positions"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_p"] = tractate_fe_df["card_projects"].apply(lambda x: nth_element(x, 0))
tractate_fe_df["card_fd2"] = tractate_fe_df["card_first_demands"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_sd2"] = tractate_fe_df["card_second_demands"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_fs2"] = tractate_fe_df["card_first_solutions"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_ss2"] = tractate_fe_df["card_second_solutions"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_fp2"] = tractate_fe_df["card_first_positions"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_sp2"] = tractate_fe_df["card_second_positions"].apply(lambda x: nth_element(x, 1))
tractate_fe_df["card_p2"] = tractate_fe_df["card_projects"].apply(lambda x: nth_element(x, 1))
tractate_fe_df.drop(columns=_card_drop_columns, axis=1, inplace=True)
#
df = pd.merge(pd.merge(base_df, device_fe_df), tractate_fe_df)
nullseries = df.isnull().sum()
nulls = nullseries[nullseries > 0]
if nulls.any():
print(nulls)
raise Exception("dataframe nulls")
return df
if __name__ == "__main__":
df = device_tractae_fe()
print(df.head(3), df.shape)
...@@ -6,7 +6,7 @@ import tensorflow as tf ...@@ -6,7 +6,7 @@ import tensorflow as tf
from tensorflow.keras import activations, layers, losses, metrics, optimizers from tensorflow.keras import activations, layers, losses, metrics, optimizers
base_dir = os.getcwd() base_dir = os.getcwd()
base_dir = "/Users/offic/work/GM/strategy_embedding/" # TODO remove # base_dir = "/Users/offic/work/GM/strategy_embedding/" # TODO remove
DATA_PATH = os.path.join(base_dir, "_data") DATA_PATH = os.path.join(base_dir, "_data")
MODEL_PATH = os.path.join(base_dir, "_models") MODEL_PATH = os.path.join(base_dir, "_models")
...@@ -28,8 +28,6 @@ DEVICE_COLUMNS = [ ...@@ -28,8 +28,6 @@ DEVICE_COLUMNS = [
"device_p2", "device_p2",
] ]
LABEL_COLUMNS = "label"
TRACTATE_COLUMNS = [ TRACTATE_COLUMNS = [
"card_id", "card_id",
"is_pure_author", "is_pure_author",
...@@ -255,6 +253,411 @@ def device_tractae_fe(): ...@@ -255,6 +253,411 @@ def device_tractae_fe():
return df return df
def get_input_dim(df, columns):
res = {}
for i in columns:
res[i] = df[i].unique().size + 1
return res
def tractate_dssm_model():
# input
device_id = layers.Input(shape=(1, ), name="device_id")
device_fd = layers.Input(shape=(1, ), name="device_fd")
device_sd = layers.Input(shape=(1, ), name="device_sd")
device_fs = layers.Input(shape=(1, ), name="device_fs")
device_ss = layers.Input(shape=(1, ), name="device_ss")
device_fp = layers.Input(shape=(1, ), name="device_fp")
device_sp = layers.Input(shape=(1, ), name="device_sp")
device_p = layers.Input(shape=(1, ), name="device_p")
device_fd2 = layers.Input(shape=(1, ), name="device_fd2")
device_sd2 = layers.Input(shape=(1, ), name="device_sd2")
device_fs2 = layers.Input(shape=(1, ), name="device_fs2")
device_ss2 = layers.Input(shape=(1, ), name="device_ss2")
device_fp2 = layers.Input(shape=(1, ), name="device_fp2")
device_sp2 = layers.Input(shape=(1, ), name="device_sp2")
device_p2 = layers.Input(shape=(1, ), name="device_p2")
card_id = layers.Input(shape=(1, ), name="card_id"),
is_pure_author = layers.Input(shape=(1, ), name="is_pure_author"),
is_have_pure_reply = layers.Input(shape=(1, ), name="is_have_pure_reply"),
is_have_reply = layers.Input(shape=(1, ), name="is_have_reply"),
content_level = layers.Input(shape=(1, ), name="content_level"),
topic_seven_click_num = layers.Input(shape=(1, ), name="topic_seven_click_num"),
topic_thirty_click_num = layers.Input(shape=(1, ), name="topic_thirty_click_num"),
topic_num = layers.Input(shape=(1, ), name="topic_num"),
seven_transform_num = layers.Input(shape=(1, ), name="seven_transform_num"),
thirty_transform_num = layers.Input(shape=(1, ), name="thirty_transform_num"),
favor_num = layers.Input(shape=(1, ), name="favor_num"),
favor_pure_num = layers.Input(shape=(1, ), name="favor_pure_num"),
vote_num = layers.Input(shape=(1, ), name="vote_num"),
vote_display_num = layers.Input(shape=(1, ), name="vote_display_num"),
reply_num = layers.Input(shape=(1, ), name="reply_num"),
reply_pure_num = layers.Input(shape=(1, ), name="reply_pure_num"),
one_click_num = layers.Input(shape=(1, ), name="one_click_num"),
three_click_num = layers.Input(shape=(1, ), name="three_click_num"),
seven_click_num = layers.Input(shape=(1, ), name="seven_click_num"),
fifteen_click_num = layers.Input(shape=(1, ), name="fifteen_click_num"),
thirty_click_num = layers.Input(shape=(1, ), name="thirty_click_num"),
sixty_click_num = layers.Input(shape=(1, ), name="sixty_click_num"),
ninety_click_num = layers.Input(shape=(1, ), name="ninety_click_num"),
history_click_num = layers.Input(shape=(1, ), name="history_click_num"),
one_precise_exposure_num = layers.Input(shape=(1, ), name="one_precise_exposure_num"),
three_precise_exposure_num = layers.Input(shape=(1, ), name="three_precise_exposure_num"),
seven_precise_exposure_num = layers.Input(shape=(1, ), name="seven_precise_exposure_num"),
fifteen_precise_exposure_num = layers.Input(shape=(1, ), name="fifteen_precise_exposure_num"),
thirty_precise_exposure_num = layers.Input(shape=(1, ), name="thirty_precise_exposure_num"),
sixty_precise_exposure_num = layers.Input(shape=(1, ), name="sixty_precise_exposure_num"),
ninety_precise_exposure_num = layers.Input(shape=(1, ), name="ninety_precise_exposure_num"),
history_precise_exposure_num = layers.Input(shape=(1, ), name="history_precise_exposure_num"),
one_vote_user_num = layers.Input(shape=(1, ), name="one_vote_user_num"),
three_vote_user_num = layers.Input(shape=(1, ), name="three_vote_user_num"),
seven_vote_user_num = layers.Input(shape=(1, ), name="seven_vote_user_num"),
fifteen_vote_user_num = layers.Input(shape=(1, ), name="fifteen_vote_user_num"),
thirty_vote_user_num = layers.Input(shape=(1, ), name="thirty_vote_user_num"),
sixty_vote_user_num = layers.Input(shape=(1, ), name="sixty_vote_user_num"),
ninety_vote_user_num = layers.Input(shape=(1, ), name="ninety_vote_user_num"),
history_vote_user_num = layers.Input(shape=(1, ), name="history_vote_user_num"),
one_reply_user_num = layers.Input(shape=(1, ), name="one_reply_user_num"),
three_reply_user_num = layers.Input(shape=(1, ), name="three_reply_user_num"),
seven_reply_user_num = layers.Input(shape=(1, ), name="seven_reply_user_num"),
fifteen_reply_user_num = layers.Input(shape=(1, ), name="fifteen_reply_user_num"),
thirty_reply_user_num = layers.Input(shape=(1, ), name="thirty_reply_user_num"),
sixty_reply_user_num = layers.Input(shape=(1, ), name="sixty_reply_user_num"),
ninety_reply_user_num = layers.Input(shape=(1, ), name="ninety_reply_user_num"),
history_reply_user_num = layers.Input(shape=(1, ), name="history_reply_user_num"),
one_browse_user_num = layers.Input(shape=(1, ), name="one_browse_user_num"),
three_browse_user_num = layers.Input(shape=(1, ), name="three_browse_user_num"),
seven_browse_user_num = layers.Input(shape=(1, ), name="seven_browse_user_num"),
fifteen_browse_user_num = layers.Input(shape=(1, ), name="fifteen_browse_user_num"),
thirty_browse_user_num = layers.Input(shape=(1, ), name="thirty_browse_user_num"),
sixty_browse_user_num = layers.Input(shape=(1, ), name="sixty_browse_user_num"),
ninety_browse_user_num = layers.Input(shape=(1, ), name="ninety_browse_user_num"),
history_browse_user_num = layers.Input(shape=(1, ), name="history_browse_user_num"),
one_reply_num = layers.Input(shape=(1, ), name="one_reply_num"),
three_reply_num = layers.Input(shape=(1, ), name="three_reply_num"),
seven_reply_num = layers.Input(shape=(1, ), name="seven_reply_num"),
fifteen_reply_num = layers.Input(shape=(1, ), name="fifteen_reply_num"),
thirty_reply_num = layers.Input(shape=(1, ), name="thirty_reply_num"),
sixty_reply_num = layers.Input(shape=(1, ), name="sixty_reply_num"),
ninety_reply_num = layers.Input(shape=(1, ), name="ninety_reply_num"),
history_reply_num = layers.Input(shape=(1, ), name="history_reply_num"),
one_ctr = layers.Input(shape=(1, ), name="one_ctr"),
three_ctr = layers.Input(shape=(1, ), name="three_ctr"),
seven_ctr = layers.Input(shape=(1, ), name="seven_ctr"),
fifteen_ctr = layers.Input(shape=(1, ), name="fifteen_ctr"),
thirty_ctr = layers.Input(shape=(1, ), name="thirty_ctr"),
sixty_ctr = layers.Input(shape=(1, ), name="sixty_ctr"),
ninety_ctr = layers.Input(shape=(1, ), name="ninety_ctr"),
history_ctr = layers.Input(shape=(1, ), name="history_ctr"),
one_vote_pure_rate = layers.Input(shape=(1, ), name="one_vote_pure_rate"),
three_vote_pure_rate = layers.Input(shape=(1, ), name="three_vote_pure_rate"),
seven_vote_pure_rate = layers.Input(shape=(1, ), name="seven_vote_pure_rate"),
fifteen_vote_pure_rate = layers.Input(shape=(1, ), name="fifteen_vote_pure_rate"),
thirty_vote_pure_rate = layers.Input(shape=(1, ), name="thirty_vote_pure_rate"),
sixty_vote_pure_rate = layers.Input(shape=(1, ), name="sixty_vote_pure_rate"),
ninety_vote_pure_rate = layers.Input(shape=(1, ), name="ninety_vote_pure_rate"),
history_vote_pure_rate = layers.Input(shape=(1, ), name="history_vote_pure_rate"),
one_reply_pure_rate = layers.Input(shape=(1, ), name="one_reply_pure_rate"),
three_reply_pure_rate = layers.Input(shape=(1, ), name="three_reply_pure_rate"),
seven_reply_pure_rate = layers.Input(shape=(1, ), name="seven_reply_pure_rate"),
fifteen_reply_pure_rate = layers.Input(shape=(1, ), name="fifteen_reply_pure_rate"),
thirty_reply_pure_rate = layers.Input(shape=(1, ), name="thirty_reply_pure_rate"),
sixty_reply_pure_rate = layers.Input(shape=(1, ), name="sixty_reply_pure_rate"),
ninety_reply_pure_rate = layers.Input(shape=(1, ), name="ninety_reply_pure_rate"),
history_reply_pure_rate = layers.Input(shape=(1, ), name="history_reply_pure_rate"),
card_fd = layers.Input(shape=(1, ), name="card_fd"),
card_sd = layers.Input(shape=(1, ), name="card_sd"),
card_fs = layers.Input(shape=(1, ), name="card_fs"),
card_ss = layers.Input(shape=(1, ), name="card_ss"),
card_fp = layers.Input(shape=(1, ), name="card_fp"),
card_sp = layers.Input(shape=(1, ), name="card_sp"),
card_p = layers.Input(shape=(1, ), name="card_p"),
card_fd2 = layers.Input(shape=(1, ), name="card_fd2"),
card_sd2 = layers.Input(shape=(1, ), name="card_sd2"),
card_fs2 = layers.Input(shape=(1, ), name="card_fs2"),
card_ss2 = layers.Input(shape=(1, ), name="card_ss2"),
card_fp2 = layers.Input(shape=(1, ), name="card_fp2"),
card_sp2 = layers.Input(shape=(1, ), name="card_sp2"),
card_p2 = layers.Input(shape=(1, ), name="card_p2"),
# user tower
device_vector = layers.concatenate([
layers.Embedding(DEVICE_DIM_DICT.get("device_id"), 1000)(device_id),
layers.Embedding(DEVICE_DIM_DICT.get("device_fd"), int(DEVICE_DIM_DICT.get("device_fd") / 10))(device_fd),
layers.Embedding(DEVICE_DIM_DICT.get("device_sd"), int(DEVICE_DIM_DICT.get("device_sd") / 10))(device_sd),
layers.Embedding(DEVICE_DIM_DICT.get("device_fs"), int(DEVICE_DIM_DICT.get("device_fs") / 10))(device_fs),
layers.Embedding(DEVICE_DIM_DICT.get("device_ss"), int(DEVICE_DIM_DICT.get("device_ss") / 10))(device_ss),
layers.Embedding(DEVICE_DIM_DICT.get("device_fp"), int(DEVICE_DIM_DICT.get("device_fp") / 10))(device_fp),
layers.Embedding(DEVICE_DIM_DICT.get("device_sp"), int(DEVICE_DIM_DICT.get("device_sp") / 10))(device_sp),
layers.Embedding(DEVICE_DIM_DICT.get("device_p"), int(DEVICE_DIM_DICT.get("device_p") / 10))(device_p),
layers.Embedding(DEVICE_DIM_DICT.get("device_fd2"), int(DEVICE_DIM_DICT.get("device_fd2") / 10))(device_fd2),
layers.Embedding(DEVICE_DIM_DICT.get("device_sd2"), int(DEVICE_DIM_DICT.get("device_sd2") / 10))(device_sd2),
layers.Embedding(DEVICE_DIM_DICT.get("device_fs2"), int(DEVICE_DIM_DICT.get("device_fs2") / 10))(device_fs2),
layers.Embedding(DEVICE_DIM_DICT.get("device_ss2"), int(DEVICE_DIM_DICT.get("device_ss2") / 10))(device_ss2),
layers.Embedding(DEVICE_DIM_DICT.get("device_fp2"), int(DEVICE_DIM_DICT.get("device_fp2") / 10))(device_fp2),
layers.Embedding(DEVICE_DIM_DICT.get("device_sp"), int(DEVICE_DIM_DICT.get("device_sp2") / 10))(device_sp2),
layers.Embedding(DEVICE_DIM_DICT.get("device_p2"), int(DEVICE_DIM_DICT.get("device_p2") / 10))(device_p2),
])
device_vector = layers.Dense(3000, activation=activations.relu)(device_vector)
device_vector = layers.Dense(
1000,
activation=activations.relu,
name="device_embedding",
kernel_regularizer="l2",
)(device_vector)
# item tower
tractate_vector = layers.concatenate(
[
layers.Embedding(TRACTATE_DIM_DICT.get("card_id"), 3000)(card_id),
layers.Embedding(TRACTATE_DIM_DICT.get("is_pure_author"),
int(TRACTATE_DIM_DICT.get("is_pure_author") / 10))(is_pure_author),
layers.Embedding(TRACTATE_DIM_DICT.get("is_have_pure_reply"),
int(TRACTATE_DIM_DICT.get("is_have_pure_reply") / 10))(is_have_pure_reply),
layers.Embedding(TRACTATE_DIM_DICT.get("is_have_reply"),
int(TRACTATE_DIM_DICT.get("is_have_reply") / 10))(is_have_reply),
layers.Embedding(TRACTATE_DIM_DICT.get("content_level"),
int(TRACTATE_DIM_DICT.get("content_level") / 10))(content_level),
layers.Embedding(TRACTATE_DIM_DICT.get("topic_seven_click_num"),
int(TRACTATE_DIM_DICT.get("topic_seven_click_num") / 10))(topic_seven_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("topic_thirty_click_num"),
int(TRACTATE_DIM_DICT.get("topic_thirty_click_num") / 10))(topic_thirty_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("topic_num"), int(TRACTATE_DIM_DICT.get("topic_num") / 10))(topic_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_transform_num"),
int(TRACTATE_DIM_DICT.get("seven_transform_num") / 10))(seven_transform_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_transform_num"),
int(TRACTATE_DIM_DICT.get("thirty_transform_num") / 10))(thirty_transform_num),
layers.Embedding(TRACTATE_DIM_DICT.get("favor_num"), int(TRACTATE_DIM_DICT.get("favor_num") / 10))(favor_num),
layers.Embedding(TRACTATE_DIM_DICT.get("favor_pure_num"),
int(TRACTATE_DIM_DICT.get("favor_pure_num") / 10))(favor_pure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("vote_num"), int(TRACTATE_DIM_DICT.get("vote_num") / 10))(vote_num),
layers.Embedding(TRACTATE_DIM_DICT.get("vote_display_num"),
int(TRACTATE_DIM_DICT.get("vote_display_num") / 10))(vote_display_num),
layers.Embedding(TRACTATE_DIM_DICT.get("reply_num"), int(TRACTATE_DIM_DICT.get("reply_num") / 10))(reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("reply_pure_num"),
int(TRACTATE_DIM_DICT.get("reply_pure_num") / 10))(reply_pure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_click_num"),
int(TRACTATE_DIM_DICT.get("one_click_num") / 10))(one_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_click_num"),
int(TRACTATE_DIM_DICT.get("three_click_num") / 10))(three_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_click_num"),
int(TRACTATE_DIM_DICT.get("seven_click_num") / 10))(seven_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_click_num"),
int(TRACTATE_DIM_DICT.get("fifteen_click_num") / 10))(fifteen_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_click_num"),
int(TRACTATE_DIM_DICT.get("thirty_click_num") / 10))(thirty_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_click_num"),
int(TRACTATE_DIM_DICT.get("sixty_click_num") / 10))(sixty_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_click_num"),
int(TRACTATE_DIM_DICT.get("ninety_click_num") / 10))(ninety_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_click_num"),
int(TRACTATE_DIM_DICT.get("history_click_num") / 10))(history_click_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("one_precise_exposure_num") / 10))(one_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("three_precise_exposure_num") / 10))(three_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("seven_precise_exposure_num") / 10))(seven_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("fifteen_precise_exposure_num") / 10))(fifteen_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("thirty_precise_exposure_num") / 10))(thirty_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("sixty_precise_exposure_num") / 10))(sixty_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("ninety_precise_exposure_num") / 10))(ninety_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_precise_exposure_num"),
int(TRACTATE_DIM_DICT.get("history_precise_exposure_num") / 10))(history_precise_exposure_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_vote_user_num"),
int(TRACTATE_DIM_DICT.get("one_vote_user_num") / 10))(one_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_vote_user_num"),
int(TRACTATE_DIM_DICT.get("three_vote_user_num") / 10))(three_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_vote_user_num"),
int(TRACTATE_DIM_DICT.get("seven_vote_user_num") / 10))(seven_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_vote_user_num"),
int(TRACTATE_DIM_DICT.get("fifteen_vote_user_num") / 10))(fifteen_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_vote_user_num"),
int(TRACTATE_DIM_DICT.get("thirty_vote_user_num") / 10))(thirty_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_vote_user_num"),
int(TRACTATE_DIM_DICT.get("sixty_vote_user_num") / 10))(sixty_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_vote_user_num"),
int(TRACTATE_DIM_DICT.get("ninety_vote_user_num") / 10))(ninety_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_vote_user_num"),
int(TRACTATE_DIM_DICT.get("history_vote_user_num") / 10))(history_vote_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_reply_user_num"),
int(TRACTATE_DIM_DICT.get("one_reply_user_num") / 10))(one_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_reply_user_num"),
int(TRACTATE_DIM_DICT.get("three_reply_user_num") / 10))(three_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_reply_user_num"),
int(TRACTATE_DIM_DICT.get("seven_reply_user_num") / 10))(seven_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_reply_user_num"),
int(TRACTATE_DIM_DICT.get("fifteen_reply_user_num") / 10))(fifteen_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_reply_user_num"),
int(TRACTATE_DIM_DICT.get("thirty_reply_user_num") / 10))(thirty_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_reply_user_num"),
int(TRACTATE_DIM_DICT.get("sixty_reply_user_num") / 10))(sixty_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_reply_user_num"),
int(TRACTATE_DIM_DICT.get("ninety_reply_user_num") / 10))(ninety_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_reply_user_num"),
int(TRACTATE_DIM_DICT.get("history_reply_user_num") / 10))(history_reply_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_browse_user_num"),
int(TRACTATE_DIM_DICT.get("one_browse_user_num") / 10))(one_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_browse_user_num"),
int(TRACTATE_DIM_DICT.get("three_browse_user_num") / 10))(three_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_browse_user_num"),
int(TRACTATE_DIM_DICT.get("seven_browse_user_num") / 10))(seven_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_browse_user_num"),
int(TRACTATE_DIM_DICT.get("fifteen_browse_user_num") / 10))(fifteen_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_browse_user_num"),
int(TRACTATE_DIM_DICT.get("thirty_browse_user_num") / 10))(thirty_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_browse_user_num"),
int(TRACTATE_DIM_DICT.get("sixty_browse_user_num") / 10))(sixty_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_browse_user_num"),
int(TRACTATE_DIM_DICT.get("ninety_browse_user_num") / 10))(ninety_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_browse_user_num"),
int(TRACTATE_DIM_DICT.get("history_browse_user_num") / 10))(history_browse_user_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_reply_num"),
int(TRACTATE_DIM_DICT.get("one_reply_num") / 10))(one_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("three_reply_num"),
int(TRACTATE_DIM_DICT.get("three_reply_num") / 10))(three_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_reply_num"),
int(TRACTATE_DIM_DICT.get("seven_reply_num") / 10))(seven_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_reply_num"),
int(TRACTATE_DIM_DICT.get("fifteen_reply_num") / 10))(fifteen_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_reply_num"),
int(TRACTATE_DIM_DICT.get("thirty_reply_num") / 10))(thirty_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_reply_num"),
int(TRACTATE_DIM_DICT.get("sixty_reply_num") / 10))(sixty_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_reply_num"),
int(TRACTATE_DIM_DICT.get("ninety_reply_num") / 10))(ninety_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("history_reply_num"),
int(TRACTATE_DIM_DICT.get("history_reply_num") / 10))(history_reply_num),
layers.Embedding(TRACTATE_DIM_DICT.get("one_ctr"), int(TRACTATE_DIM_DICT.get("one_ctr") / 10))(one_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("three_ctr"), int(TRACTATE_DIM_DICT.get("three_ctr") / 10))(three_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_ctr"), int(TRACTATE_DIM_DICT.get("seven_ctr") / 10))(seven_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_ctr"), int(TRACTATE_DIM_DICT.get("fifteen_ctr") / 10))(fifteen_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_ctr"), int(TRACTATE_DIM_DICT.get("thirty_ctr") / 10))(thirty_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_ctr"), int(TRACTATE_DIM_DICT.get("sixty_ctr") / 10))(sixty_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_ctr"), int(TRACTATE_DIM_DICT.get("ninety_ctr") / 10))(ninety_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("history_ctr"), int(TRACTATE_DIM_DICT.get("history_ctr") / 10))(history_ctr),
layers.Embedding(TRACTATE_DIM_DICT.get("one_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("one_vote_pure_rate") / 10))(one_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("three_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("three_vote_pure_rate") / 10))(three_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("seven_vote_pure_rate") / 10))(seven_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("fifteen_vote_pure_rate") / 10))(fifteen_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("thirty_vote_pure_rate") / 10))(thirty_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("sixty_vote_pure_rate") / 10))(sixty_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("ninety_vote_pure_rate") / 10))(ninety_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("history_vote_pure_rate"),
int(TRACTATE_DIM_DICT.get("history_vote_pure_rate") / 10))(history_vote_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("one_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("one_reply_pure_rate") / 10))(one_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("three_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("three_reply_pure_rate") / 10))(three_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("seven_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("seven_reply_pure_rate") / 10))(seven_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("fifteen_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("fifteen_reply_pure_rate") / 10))(fifteen_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("thirty_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("thirty_reply_pure_rate") / 10))(thirty_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("sixty_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("sixty_reply_pure_rate") / 10))(sixty_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("ninety_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("ninety_reply_pure_rate") / 10))(ninety_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("history_reply_pure_rate"),
int(TRACTATE_DIM_DICT.get("history_reply_pure_rate") / 10))(history_reply_pure_rate),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fd"), int(TRACTATE_DIM_DICT.get("card_fd") / 10))(card_fd),
layers.Embedding(TRACTATE_DIM_DICT.get("card_sd"), int(TRACTATE_DIM_DICT.get("card_sd") / 10))(card_sd),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fs"), int(TRACTATE_DIM_DICT.get("card_fs") / 10))(card_fs),
layers.Embedding(TRACTATE_DIM_DICT.get("card_ss"), int(TRACTATE_DIM_DICT.get("card_ss") / 10))(card_ss),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fp"), int(TRACTATE_DIM_DICT.get("card_fp") / 10))(card_fp),
layers.Embedding(TRACTATE_DIM_DICT.get("card_sp"), int(TRACTATE_DIM_DICT.get("card_sp") / 10))(card_sp),
layers.Embedding(TRACTATE_DIM_DICT.get("card_p"), int(TRACTATE_DIM_DICT.get("card_p") / 10))(card_p),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fd2"), int(TRACTATE_DIM_DICT.get("card_fd2") / 10))(card_fd2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_sd2"), int(TRACTATE_DIM_DICT.get("card_sd2") / 10))(card_sd2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fs2"), int(TRACTATE_DIM_DICT.get("card_fs2") / 10))(card_fs2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_ss2"), int(TRACTATE_DIM_DICT.get("card_ss2") / 10))(card_ss2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_fp2"), int(TRACTATE_DIM_DICT.get("card_fp2") / 10))(card_fp2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_sp2"), int(TRACTATE_DIM_DICT.get("card_sp2") / 10))(card_sp2),
layers.Embedding(TRACTATE_DIM_DICT.get("card_p2"), int(TRACTATE_DIM_DICT.get("card_p2") / 10))(card_p2),
])
tractate_vector = layers.Dense(3000, activation=activations.relu)(tractate_vector)
tractate_vector = layers.Dense(
1000,
activation=activations.relu,
name="tractate_embedding",
kernel_regularizer="l2",
)(tractate_vector)
device_tractate_dot = tf.reduce_sum(device_vector * tractate_vector, axis=1)
device_tractate_dot = tf.expand_dims(device_tractate_dot, 1)
output = layers.Dense(1, activation=activations.sigmoid)(device_tractate_dot)
inputs = [
device_id, device_fd, device_sd, device_fs, device_ss, device_fp, device_sp, device_p, device_fd2, device_sd2, device_fs2,
device_ss2, device_fp2, device_sp2, device_p2, card_id, is_pure_author, is_have_pure_reply, is_have_reply, content_level,
topic_seven_click_num, topic_thirty_click_num, topic_num, seven_transform_num, thirty_transform_num, favor_num,
favor_pure_num, vote_num, vote_display_num, reply_num, reply_pure_num, one_click_num, three_click_num, seven_click_num,
fifteen_click_num, thirty_click_num, sixty_click_num, ninety_click_num, history_click_num, one_precise_exposure_num,
three_precise_exposure_num, seven_precise_exposure_num, fifteen_precise_exposure_num, thirty_precise_exposure_num,
sixty_precise_exposure_num, ninety_precise_exposure_num, history_precise_exposure_num, one_vote_user_num,
three_vote_user_num, seven_vote_user_num, fifteen_vote_user_num, thirty_vote_user_num, sixty_vote_user_num,
ninety_vote_user_num, history_vote_user_num, one_reply_user_num, three_reply_user_num, seven_reply_user_num,
fifteen_reply_user_num, thirty_reply_user_num, sixty_reply_user_num, ninety_reply_user_num, history_reply_user_num,
one_browse_user_num, three_browse_user_num, seven_browse_user_num, fifteen_browse_user_num, thirty_browse_user_num,
sixty_browse_user_num, ninety_browse_user_num, history_browse_user_num, one_reply_num, three_reply_num, seven_reply_num,
fifteen_reply_num, thirty_reply_num, sixty_reply_num, ninety_reply_num, history_reply_num, one_ctr, three_ctr, seven_ctr,
fifteen_ctr, thirty_ctr, sixty_ctr, ninety_ctr, history_ctr, one_vote_pure_rate, three_vote_pure_rate,
seven_vote_pure_rate, fifteen_vote_pure_rate, thirty_vote_pure_rate, sixty_vote_pure_rate, ninety_vote_pure_rate,
history_vote_pure_rate, one_reply_pure_rate, three_reply_pure_rate, seven_reply_pure_rate, fifteen_reply_pure_rate,
thirty_reply_pure_rate, sixty_reply_pure_rate, ninety_reply_pure_rate, history_reply_pure_rate, card_fd, card_sd, card_fs,
card_ss, card_fp, card_sp, card_p, card_fd2, card_sd2, card_fs2, card_ss2, card_fp2, card_sp2, card_p2
]
model = tf.keras.Model(inputs=inputs, outputs=[output])
print(model.summary())
model.compile(
loss=losses.MeanSquaredError(),
optimizer=optimizers.RMSprop(),
metrics=[metrics.binary_accuracy],
)
return model
if __name__ == "__main__": if __name__ == "__main__":
df = device_tractae_fe() df = device_tractae_fe()
print(df.head(3), df.shape) print(df.head(3), df.shape)
y = df["label"]
# device_df = df[DEVICE_COLUMNS]
# tractate_df = df[TRACTATE_COLUMNS]
DEVICE_DIM_DICT = get_input_dim(df, DEVICE_COLUMNS)
TRACTATE_DIM_DICT = get_input_dim(df, TRACTATE_COLUMNS)
model = tractate_dssm_model()
x_train = []
for i in DEVICE_COLUMNS + TRACTATE_COLUMNS:
x_train.append(df[i])
history = model.fit(x=x_train,
y=y,
batch_size=32,
epochs=5,
verbose=1,
callbacks=[
tf.keras.callbacks.EarlyStopping(monitor="loss", patience=3),
])
history_dict = history.history
print(history_dict)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment