Commit 4661bcdb authored by 赵威's avatar 赵威

try cross columns for diary

parent f86662b3
......@@ -304,6 +304,15 @@ CATEGORICAL_COLUMNS = [
"is_related_service",
"service_city",
]
CROSS_COLUMNS = [
["device_fd", "content_fd"],
["device_sd", "content_sd"],
["device_fs", "content_fs"],
["device_ss", "content_ss"],
["device_fp", "content_fp"],
["device_sp", "content_sp"],
["device_p", "content_p"],
]
def read_csv_data(dataset_path):
......
......@@ -4,7 +4,15 @@ from tensorflow import feature_column as fc
from ..utils import create_boundaries, create_vocabulary_list
def build_features(df, int_columns, float_columns, categorical_columns):
def build_features(df, int_columns, float_columns, categorical_columns, cross_columns=[]):
"""
df:
int_columns:
float_columns:
categorical_columns:
cross_columns=[]:
return:
"""
numeric_features = []
for col in (int_columns + float_columns):
if col in int_columns:
......@@ -33,11 +41,13 @@ def build_features(df, int_columns, float_columns, categorical_columns):
categorical_features.append(
fc.indicator_column(fc.categorical_column_with_vocabulary_list(col, create_vocabulary_list(df, col))))
# TODO try cross feature
a = fc.categorical_column_with_vocabulary_list("device_fd", create_vocabulary_list(df, "device_fd"))
b = fc.categorical_column_with_vocabulary_list("content_fd", create_vocabulary_list(df, "content_fd"))
c = tf.feature_column.crossed_column([a, b], hash_bucket_size=10)
categorical_features.append(tf.feature_column.indicator_column(c))
for lst in cross_columns:
column_a = lst[0]
column_b = lst[1]
tmp_a = fc.categorical_column_with_vocabulary_list(column_a, create_vocabulary_list(df, column_a))
tmp_b = fc.categorical_column_with_vocabulary_list(column_b, create_vocabulary_list(df, column_b))
tmp_res = tf.feature_column.crossed_column([tmp_a, tmp_b], hash_bucket_size=10)
categorical_features.append(tf.feature_column.indicator_column(tmp_res))
all_features = (numeric_features + categorical_features)
return all_features
......@@ -49,7 +49,8 @@ def main():
train_df, test_df = train_test_split(df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)
all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS)
all_features = fe.build_features(df, diary_fe.INT_COLUMNS, diary_fe.FLOAT_COLUMNS, diary_fe.CATEGORICAL_COLUMNS,
diary_fe.CROSS_COLUMNS)
params = {"feature_columns": all_features, "hidden_units": [360, 200, 80, 2], "learning_rate": 0.2}
model_path = str(Path("/data/files/model_tmp/diary/").expanduser())
if os.path.exists(model_path):
......@@ -79,7 +80,7 @@ def main():
print("============================================================")
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
# save_path = "/data/files/models/diary/1597390452" # server
# save_path = "/data/files/models/diary/1598948682" # server
# tf.saved_model.load
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment