Commit ff1b69af authored by 赵威's avatar 赵威

Merge branch 'fe' into 'offic'

Fe

See merge request !2
parents 52f719f1 679951ea
......@@ -87,6 +87,11 @@ _int_columns = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
_float_columns = [
"one_ctr",
......@@ -121,13 +126,55 @@ _float_columns = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
_categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs",
"content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3", "ss1",
"ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2",
"click_diary_id3", "click_diary_id4", "click_diary_id5"
"device_id",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"content_fd",
"content_sd",
"content_fs",
"content_ss",
"content_fp",
"content_sp",
"content_p",
"fd1",
"fd2",
"fd3",
"sd1",
"sd2",
"sd3",
"fs1",
"fs2",
"fs3",
"ss1",
"ss2",
"ss3",
"fp1",
"fp2",
"fp3",
"sp1",
"sp2",
"sp3",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"service_city",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
import pandas as pd
from utils.cache import redis_db_client
# "channel_first", "city_first", "model_first",
DIARY_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "past_consume_ability_history", "potential_consume_ability_history",
"price_sensitive_history", "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions",
"second_positions", "projects", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4", "click_diary_id5"
"device_id",
"active_type",
"active_days",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
]
TRACTATE_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "channel_first", "city_first", "model_first", "past_consume_ability_history",
"potential_consume_ability_history", "price_sensitive_history", "first_demands", "second_demands", "first_solutions",
"second_solutions", "first_positions", "second_positions", "projects", "click_tractate_id1", "click_tractate_id2",
"click_tractate_id3", "click_tractate_id4", "click_tractate_id5"
"device_id",
"active_type",
"active_days",
"channel_first",
"city_first",
"model_first",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
]
......
......@@ -124,6 +124,14 @@ DIARY_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_price",
"service_sold_num",
"service_city",
]
INT_COLUMNS = [
"active_days",
......@@ -201,6 +209,10 @@ INT_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
FLOAT_COLUMNS = [
"one_ctr",
......@@ -235,14 +247,62 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2",
"fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3", "device_ss",
"content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp", "sp1", "sp2",
"sp3", "device_p", "content_p", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4",
"click_diary_id5"
"device_id",
"active_type",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"card_id",
"is_pure_author",
"is_have_reply",
"is_have_pure_reply",
"content_level",
"device_fd",
"content_fd",
"fd1",
"fd2",
"fd3",
"device_sd",
"content_sd",
"sd1",
"sd2",
"sd3",
"device_fs",
"content_fs",
"fs1",
"fs2",
"fs3",
"device_ss",
"content_ss",
"ss1",
"ss2",
"ss3",
"device_fp",
"content_fp",
"fp1",
"fp2",
"fp3",
"device_sp",
"content_sp",
"sp1",
"sp2",
"sp3",
"device_p",
"content_p",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"is_related_service",
"service_city",
]
......@@ -275,7 +335,7 @@ def get_diary_dict_from_redis():
if "" in tmp[col_name]:
tmp[col_name].remove("")
tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply", "is_related_service"]:
if elem == "true":
tmp[col_name] = 1
else:
......@@ -317,6 +377,9 @@ def diary_feature_engineering(df):
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df["service_city"] = diary_df["service_city"].fillna("")
diary_df = diary_df[DIARY_COLUMNS]
print("diary: " + str(diary_df.shape))
......
......@@ -86,6 +86,22 @@ TRACTATE_COLUMNS = [
"sixty_ctr",
"ninety_ctr",
"history_ctr",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"first_demands",
"second_demands",
"first_solutions",
......@@ -100,6 +116,14 @@ TRACTATE_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_price",
"service_sold_num",
"service_city",
]
INT_COLUMNS = [
......@@ -154,6 +178,14 @@ INT_COLUMNS = [
"sixty_browse_user_num",
"ninety_browse_user_num",
"history_browse_user_num",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"first_demands_num",
"second_demands_num",
"first_solutions_num",
......@@ -161,6 +193,10 @@ INT_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
FLOAT_COLUMNS = [
"one_ctr",
......@@ -187,14 +223,71 @@ FLOAT_COLUMNS = [
# "sixty_reply_pure_rate",
# "ninety_reply_pure_rate",
# "history_reply_pure_rate",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "show_tag_id", "device_fd", "content_fd",
"fd1", "fd2", "fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3",
"device_ss", "content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp",
"sp1", "sp2", "sp3", "device_p", "content_p", "p1", "p2", "p3", "click_tractate_id1", "click_tractate_id2",
"click_tractate_id3", "click_tractate_id4", "click_tractate_id5"
"device_id",
"active_type",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"card_id",
"is_pure_author",
"is_have_reply",
"is_have_pure_reply",
"content_level",
"show_tag_id",
"device_fd",
"content_fd",
"fd1",
"fd2",
"fd3",
"device_sd",
"content_sd",
"sd1",
"sd2",
"sd3",
"device_fs",
"content_fs",
"fs1",
"fs2",
"fs3",
"device_ss",
"content_ss",
"ss1",
"ss2",
"ss3",
"device_fp",
"content_fp",
"fp1",
"fp2",
"fp3",
"device_sp",
"content_sp",
"sp1",
"sp2",
"sp3",
"device_p",
"content_p",
"p1",
"p2",
"p3",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
"is_related_service",
"service_city",
]
......@@ -227,7 +320,7 @@ def get_tractate_dict_from_redis():
if "" in tmp[col_name]:
tmp[col_name].remove("")
tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply", "is_related_service"]:
if elem == "true":
tmp[col_name] = 1
else:
......@@ -270,6 +363,9 @@ def tractate_feature_engineering(tractate_df):
df["is_have_reply"] = df["is_have_reply"].astype(int)
df["show_tag_id"] = df["show_tag_id"].astype(str)
df["is_related_service"] = df["is_related_service"].astype(int)
df["service_city"] = df["service_city"].fillna("")
df = df[TRACTATE_COLUMNS]
print("tractate: " + str(df.shape))
......
......@@ -63,6 +63,14 @@ _int_columns = [
"sixty_browse_user_num",
"ninety_browse_user_num",
"history_browse_user_num",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"first_demands_num",
"second_demands_num",
"first_solutions_num",
......@@ -70,6 +78,11 @@ _int_columns = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
]
_float_columns = [
"one_ctr",
......@@ -96,13 +109,64 @@ _float_columns = [
# "sixty_reply_pure_rate",
# "ninety_reply_pure_rate",
# "history_reply_pure_rate",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
]
_categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "show_tag_id",
"device_fd", "device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd",
"content_fs", "content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2",
"fs3", "ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3", "click_tractate_id1",
"click_tractate_id2", "click_tractate_id3", "click_tractate_id4", "click_tractate_id5"
"device_id",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"show_tag_id",
"device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"content_fd",
"content_sd",
"content_fs",
"content_ss",
"content_fp",
"content_sp",
"content_p",
"fd1",
"fd2",
"fd3",
"sd1",
"sd2",
"sd3",
"fs1",
"fs2",
"fs3",
"ss1",
"ss2",
"ss3",
"fp1",
"fp2",
"fp3",
"sp1",
"sp2",
"sp3",
"p1",
"p2",
"p3",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
"service_city",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
......@@ -75,7 +75,9 @@ def main():
save_path = get_essm_model_save_path("tractate")
print("load path: " + save_path)
# # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local
# save_path = str(Path("~/data/models/tractate/1598236893").expanduser()) # local
# save_path = "/data/files/models/tractate/1598254242" # server
predict_fn = tf.contrib.predictor.from_saved_model(save_path)
device_dict = device_fe.get_device_dict_from_redis()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment