Commit b362c073 authored by 赵威's avatar 赵威

service feature

parent 52f719f1
{
"executionEnvironments": [{ "root": "src" }]
}
......@@ -87,6 +87,11 @@ _int_columns = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
_float_columns = [
"one_ctr",
......@@ -121,13 +126,55 @@ _float_columns = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
_categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
"content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3", "ss1",
"ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2",
"click_diary_id3", "click_diary_id4", "click_diary_id5"
"device_id",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"content_fd",
"content_sd",
"content_fs",
"content_ss",
"content_fp",
"content_sp",
"content_p",
"fd1",
"fd2",
"fd3",
"sd1",
"sd2",
"sd3",
"fs1",
"fs2",
"fs3",
"ss1",
"ss2",
"ss3",
"fp1",
"fp2",
"fp3",
"sp1",
"sp2",
"sp3",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"service_city",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
import pandas as pd
from utils.cache import redis_db_client
# "channel_first", "city_first", "model_first",
DIARY_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "past_consume_ability_history", "potential_consume_ability_history",
"price_sensitive_history", "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
"second_positions", "projects", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4", "click_diary_id5"
"device_id",
"active_type",
"active_days",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
]
TRACTATE_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "channel_first", "city_first", "model_first", "past_consume_ability_history",
"potential_consume_ability_history", "price_sensitive_history", "first_demands", "second_demands", "first_solutions",
"second_solutions", "first_positions", "second_positions", "projects", "click_tractate_id1", "click_tractate_id2",
"click_tractate_id3", "click_tractate_id4", "click_tractate_id5"
"device_id",
"active_type",
"active_days",
"channel_first",
"city_first",
"model_first",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
]
......
......@@ -124,6 +124,14 @@ DIARY_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_price",
"service_sold_num",
"service_city",
]
INT_COLUMNS = [
"active_days",
......@@ -201,6 +209,10 @@ INT_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
FLOAT_COLUMNS = [
"one_ctr",
......@@ -235,14 +247,62 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2",
"fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3", "device_ss",
"content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp", "sp1", "sp2",
"sp3", "device_p", "content_p", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4",
"click_diary_id5"
"device_id",
"active_type",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"card_id",
"is_pure_author",
"is_have_reply",
"is_have_pure_reply",
"content_level",
"device_fd",
"content_fd",
"fd1",
"fd2",
"fd3",
"device_sd",
"content_sd",
"sd1",
"sd2",
"sd3",
"device_fs",
"content_fs",
"fs1",
"fs2",
"fs3",
"device_ss",
"content_ss",
"ss1",
"ss2",
"ss3",
"device_fp",
"content_fp",
"fp1",
"fp2",
"fp3",
"device_sp",
"content_sp",
"sp1",
"sp2",
"sp3",
"device_p",
"content_p",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"is_related_service",
"service_city",
]
......@@ -275,7 +335,7 @@ def get_diary_dict_from_redis():
if "" in tmp[col_name]:
tmp[col_name].remove("")
tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply", "is_related_service"]:
if elem == "true":
tmp[col_name] = 1
else:
......@@ -316,6 +376,7 @@ def diary_feature_engineering(df):
diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int)
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df = diary_df[DIARY_COLUMNS]
......
......@@ -61,7 +61,8 @@ def main():
estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000)
# TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=15000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
......@@ -73,7 +74,8 @@ def main():
model_export_path = str(Path("/data/files/models/diary").expanduser())
save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path)
set_essm_model_save_path("diary", save_path)
# TODO save
# set_essm_model_save_path("diary", save_path)
print("============================================================")
# save_path = str(Path("~/Desktop/models/1596012827").expanduser()) # local
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment