Commit ff1b69af authored by 赵威's avatar 赵威

Merge branch 'fe' into 'offic'

Fe

See merge request !2
parents 52f719f1 679951ea
...@@ -87,6 +87,11 @@ _int_columns = [ ...@@ -87,6 +87,11 @@ _int_columns = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -121,13 +126,55 @@ _float_columns = [ ...@@ -121,13 +126,55 @@ _float_columns = [
"sixty_browse_duration_avg", "sixty_browse_duration_avg",
"ninety_browse_duration_avg", "ninety_browse_duration_avg",
"history_browse_duration_avg", "history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
] ]
_categorical_columns = [ _categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_fd", "device_id",
"device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "content_fs", "past_consume_ability_history",
"content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "fs3", "ss1", "potential_consume_ability_history",
"ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2", "price_sensitive_history",
"click_diary_id3", "click_diary_id4", "click_diary_id5" "device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"content_fd",
"content_sd",
"content_fs",
"content_ss",
"content_fp",
"content_sp",
"content_p",
"fd1",
"fd2",
"fd3",
"sd1",
"sd2",
"sd3",
"fs1",
"fs2",
"fs3",
"ss1",
"ss2",
"ss3",
"fp1",
"fp2",
"fp3",
"sp1",
"sp2",
"sp3",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"service_city",
] ]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
import pandas as pd import pandas as pd
from utils.cache import redis_db_client from utils.cache import redis_db_client
# "channel_first", "city_first", "model_first",
DIARY_DEVICE_COLUMNS = [ DIARY_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "past_consume_ability_history", "potential_consume_ability_history", "device_id",
"price_sensitive_history", "first_demands", "second_demands", "first_solutions", "second_solutions", "first_positions", "active_type",
"second_positions", "projects", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4", "click_diary_id5" "active_days",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
] ]
TRACTATE_DEVICE_COLUMNS = [ TRACTATE_DEVICE_COLUMNS = [
"device_id", "active_type", "active_days", "channel_first", "city_first", "model_first", "past_consume_ability_history", "device_id",
"potential_consume_ability_history", "price_sensitive_history", "first_demands", "second_demands", "first_solutions", "active_type",
"second_solutions", "first_positions", "second_positions", "projects", "click_tractate_id1", "click_tractate_id2", "active_days",
"click_tractate_id3", "click_tractate_id4", "click_tractate_id5" "channel_first",
"city_first",
"model_first",
"past_consume_ability_history",
"potential_consume_ability_history",
"price_sensitive_history",
"first_demands",
"second_demands",
"first_solutions",
"second_solutions",
"first_positions",
"second_positions",
"projects",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
] ]
......
...@@ -124,6 +124,14 @@ DIARY_COLUMNS = [ ...@@ -124,6 +124,14 @@ DIARY_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_price",
"service_sold_num",
"service_city",
] ]
INT_COLUMNS = [ INT_COLUMNS = [
"active_days", "active_days",
...@@ -201,6 +209,10 @@ INT_COLUMNS = [ ...@@ -201,6 +209,10 @@ INT_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
] ]
FLOAT_COLUMNS = [ FLOAT_COLUMNS = [
"one_ctr", "one_ctr",
...@@ -235,14 +247,62 @@ FLOAT_COLUMNS = [ ...@@ -235,14 +247,62 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg", "sixty_browse_duration_avg",
"ninety_browse_duration_avg", "ninety_browse_duration_avg",
"history_browse_duration_avg", "history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
] ]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_id",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "device_fd", "content_fd", "fd1", "fd2", "active_type",
"fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3", "device_ss", "past_consume_ability_history",
"content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp", "sp1", "sp2", "potential_consume_ability_history",
"sp3", "device_p", "content_p", "p1", "p2", "p3", "click_diary_id1", "click_diary_id2", "click_diary_id3", "click_diary_id4", "price_sensitive_history",
"click_diary_id5" "card_id",
"is_pure_author",
"is_have_reply",
"is_have_pure_reply",
"content_level",
"device_fd",
"content_fd",
"fd1",
"fd2",
"fd3",
"device_sd",
"content_sd",
"sd1",
"sd2",
"sd3",
"device_fs",
"content_fs",
"fs1",
"fs2",
"fs3",
"device_ss",
"content_ss",
"ss1",
"ss2",
"ss3",
"device_fp",
"content_fp",
"fp1",
"fp2",
"fp3",
"device_sp",
"content_sp",
"sp1",
"sp2",
"sp3",
"device_p",
"content_p",
"p1",
"p2",
"p3",
"click_diary_id1",
"click_diary_id2",
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"is_related_service",
"service_city",
] ]
...@@ -275,7 +335,7 @@ def get_diary_dict_from_redis(): ...@@ -275,7 +335,7 @@ def get_diary_dict_from_redis():
if "" in tmp[col_name]: if "" in tmp[col_name]:
tmp[col_name].remove("") tmp[col_name].remove("")
tmp[col_name + "_num"] = len(tmp[col_name]) tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]: elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply", "is_related_service"]:
if elem == "true": if elem == "true":
tmp[col_name] = 1 tmp[col_name] = 1
else: else:
...@@ -317,6 +377,9 @@ def diary_feature_engineering(df): ...@@ -317,6 +377,9 @@ def diary_feature_engineering(df):
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int) diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int) diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df["service_city"] = diary_df["service_city"].fillna("")
diary_df = diary_df[DIARY_COLUMNS] diary_df = diary_df[DIARY_COLUMNS]
print("diary: " + str(diary_df.shape)) print("diary: " + str(diary_df.shape))
......
...@@ -86,6 +86,22 @@ TRACTATE_COLUMNS = [ ...@@ -86,6 +86,22 @@ TRACTATE_COLUMNS = [
"sixty_ctr", "sixty_ctr",
"ninety_ctr", "ninety_ctr",
"history_ctr", "history_ctr",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"first_demands", "first_demands",
"second_demands", "second_demands",
"first_solutions", "first_solutions",
...@@ -100,6 +116,14 @@ TRACTATE_COLUMNS = [ ...@@ -100,6 +116,14 @@ TRACTATE_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_price",
"service_sold_num",
"service_city",
] ]
INT_COLUMNS = [ INT_COLUMNS = [
...@@ -154,6 +178,14 @@ INT_COLUMNS = [ ...@@ -154,6 +178,14 @@ INT_COLUMNS = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"first_demands_num", "first_demands_num",
"second_demands_num", "second_demands_num",
"first_solutions_num", "first_solutions_num",
...@@ -161,6 +193,10 @@ INT_COLUMNS = [ ...@@ -161,6 +193,10 @@ INT_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
] ]
FLOAT_COLUMNS = [ FLOAT_COLUMNS = [
"one_ctr", "one_ctr",
...@@ -187,14 +223,71 @@ FLOAT_COLUMNS = [ ...@@ -187,14 +223,71 @@ FLOAT_COLUMNS = [
# "sixty_reply_pure_rate", # "sixty_reply_pure_rate",
# "ninety_reply_pure_rate", # "ninety_reply_pure_rate",
# "history_reply_pure_rate", # "history_reply_pure_rate",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
] ]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "active_type", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "device_id",
"card_id", "is_pure_author", "is_have_reply", "is_have_pure_reply", "content_level", "show_tag_id", "device_fd", "content_fd", "active_type",
"fd1", "fd2", "fd3", "device_sd", "content_sd", "sd1", "sd2", "sd3", "device_fs", "content_fs", "fs1", "fs2", "fs3", "past_consume_ability_history",
"device_ss", "content_ss", "ss1", "ss2", "ss3", "device_fp", "content_fp", "fp1", "fp2", "fp3", "device_sp", "content_sp", "potential_consume_ability_history",
"sp1", "sp2", "sp3", "device_p", "content_p", "p1", "p2", "p3", "click_tractate_id1", "click_tractate_id2", "price_sensitive_history",
"click_tractate_id3", "click_tractate_id4", "click_tractate_id5" "card_id",
"is_pure_author",
"is_have_reply",
"is_have_pure_reply",
"content_level",
"show_tag_id",
"device_fd",
"content_fd",
"fd1",
"fd2",
"fd3",
"device_sd",
"content_sd",
"sd1",
"sd2",
"sd3",
"device_fs",
"content_fs",
"fs1",
"fs2",
"fs3",
"device_ss",
"content_ss",
"ss1",
"ss2",
"ss3",
"device_fp",
"content_fp",
"fp1",
"fp2",
"fp3",
"device_sp",
"content_sp",
"sp1",
"sp2",
"sp3",
"device_p",
"content_p",
"p1",
"p2",
"p3",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
"is_related_service",
"service_city",
] ]
...@@ -227,7 +320,7 @@ def get_tractate_dict_from_redis(): ...@@ -227,7 +320,7 @@ def get_tractate_dict_from_redis():
if "" in tmp[col_name]: if "" in tmp[col_name]:
tmp[col_name].remove("") tmp[col_name].remove("")
tmp[col_name + "_num"] = len(tmp[col_name]) tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]: elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply", "is_related_service"]:
if elem == "true": if elem == "true":
tmp[col_name] = 1 tmp[col_name] = 1
else: else:
...@@ -270,6 +363,9 @@ def tractate_feature_engineering(tractate_df): ...@@ -270,6 +363,9 @@ def tractate_feature_engineering(tractate_df):
df["is_have_reply"] = df["is_have_reply"].astype(int) df["is_have_reply"] = df["is_have_reply"].astype(int)
df["show_tag_id"] = df["show_tag_id"].astype(str) df["show_tag_id"] = df["show_tag_id"].astype(str)
df["is_related_service"] = df["is_related_service"].astype(int)
df["service_city"] = df["service_city"].fillna("")
df = df[TRACTATE_COLUMNS] df = df[TRACTATE_COLUMNS]
print("tractate: " + str(df.shape)) print("tractate: " + str(df.shape))
......
...@@ -63,6 +63,14 @@ _int_columns = [ ...@@ -63,6 +63,14 @@ _int_columns = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"one_share_num",
"three_share_num",
"seven_share_num",
"fifteen_share_num",
"thirty_share_num",
"sixty_share_num",
"ninety_share_num",
"history_share_num",
"first_demands_num", "first_demands_num",
"second_demands_num", "second_demands_num",
"first_solutions_num", "first_solutions_num",
...@@ -70,6 +78,11 @@ _int_columns = [ ...@@ -70,6 +78,11 @@ _int_columns = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -96,13 +109,64 @@ _float_columns = [ ...@@ -96,13 +109,64 @@ _float_columns = [
# "sixty_reply_pure_rate", # "sixty_reply_pure_rate",
# "ninety_reply_pure_rate", # "ninety_reply_pure_rate",
# "history_reply_pure_rate", # "history_reply_pure_rate",
"one_browse_duration_avg",
"three_browse_duration_avg",
"seven_browse_duration_avg",
"fifteen_browse_duration_avg",
"thirty_browse_duration_avg",
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
] ]
_categorical_columns = [ _categorical_columns = [
"device_id", "past_consume_ability_history", "potential_consume_ability_history", "price_sensitive_history", "show_tag_id", "device_id",
"device_fd", "device_sd", "device_fs", "device_ss", "device_fp", "device_sp", "device_p", "content_fd", "content_sd", "past_consume_ability_history",
"content_fs", "content_ss", "content_fp", "content_sp", "content_p", "fd1", "fd2", "fd3", "sd1", "sd2", "sd3", "fs1", "fs2", "potential_consume_ability_history",
"fs3", "ss1", "ss2", "ss3", "fp1", "fp2", "fp3", "sp1", "sp2", "sp3", "p1", "p2", "p3", "click_tractate_id1", "price_sensitive_history",
"click_tractate_id2", "click_tractate_id3", "click_tractate_id4", "click_tractate_id5" "show_tag_id",
"device_fd",
"device_sd",
"device_fs",
"device_ss",
"device_fp",
"device_sp",
"device_p",
"content_fd",
"content_sd",
"content_fs",
"content_ss",
"content_fp",
"content_sp",
"content_p",
"fd1",
"fd2",
"fd3",
"sd1",
"sd2",
"sd3",
"fs1",
"fs2",
"fs3",
"ss1",
"ss2",
"ss3",
"fp1",
"fp2",
"fp3",
"sp1",
"sp2",
"sp3",
"p1",
"p2",
"p3",
"click_tractate_id1",
"click_tractate_id2",
"click_tractate_id3",
"click_tractate_id4",
"click_tractate_id5",
"service_city",
] ]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
...@@ -75,7 +75,9 @@ def main(): ...@@ -75,7 +75,9 @@ def main():
save_path = get_essm_model_save_path("tractate") save_path = get_essm_model_save_path("tractate")
print("load path: " + save_path) print("load path: " + save_path)
# # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local
# save_path = str(Path("~/data/models/tractate/1598236893").expanduser()) # local
# save_path = "/data/files/models/tractate/1598254242" # server
predict_fn = tf.contrib.predictor.from_saved_model(save_path) predict_fn = tf.contrib.predictor.from_saved_model(save_path)
device_dict = device_fe.get_device_dict_from_redis() device_dict = device_fe.get_device_dict_from_redis()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment