Commit c60da982 authored by 赵威's avatar 赵威

Merge branch 'fe' into 'offic'

Fe

See merge request !21
parents e110acf5 a2df4105
...@@ -87,11 +87,11 @@ _int_columns = [ ...@@ -87,11 +87,11 @@ _int_columns = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service", # "is_related_service",
"effect_second_skip_num", # "effect_second_skip_num",
"business_second_skip_num", # "business_second_skip_num",
"service_price", # "service_price",
"service_sold_num", # "service_sold_num",
] ]
_float_columns = [ _float_columns = [
"one_ctr", "one_ctr",
...@@ -126,8 +126,8 @@ _float_columns = [ ...@@ -126,8 +126,8 @@ _float_columns = [
"sixty_browse_duration_avg", "sixty_browse_duration_avg",
"ninety_browse_duration_avg", "ninety_browse_duration_avg",
"history_browse_duration_avg", "history_browse_duration_avg",
"effect_second_skip_rate", # "effect_second_skip_rate",
"business_second_skip_rate", # "business_second_skip_rate",
] ]
_categorical_columns = [ _categorical_columns = [
"device_id", "device_id",
...@@ -174,25 +174,25 @@ _categorical_columns = [ ...@@ -174,25 +174,25 @@ _categorical_columns = [
"click_diary_id3", "click_diary_id3",
"click_diary_id4", "click_diary_id4",
"click_diary_id5", "click_diary_id5",
"service_id", # "service_id",
"service_city", # "service_city",
"recommend_service_id", # "recommend_service_id",
"recommend_service_city", # "recommend_service_city",
"recommend_service_price", # "recommend_service_price",
"device_fd2", # "device_fd2",
"device_sd2", # "device_sd2",
"device_fs2", # "device_fs2",
"device_ss2", # "device_ss2",
"device_fp2", # "device_fp2",
"device_sp2", # "device_sp2",
"device_p2", # "device_p2",
"device_fd3", # "device_fd3",
"device_sd3", # "device_sd3",
"device_fs3", # "device_fs3",
"device_ss3", # "device_ss3",
"device_fp3", # "device_fp3",
"device_sp3", # "device_sp3",
"device_p3", # "device_p3",
] ]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
...@@ -124,18 +124,18 @@ DIARY_COLUMNS = [ ...@@ -124,18 +124,18 @@ DIARY_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"is_related_service", # "is_related_service",
"effect_second_skip_num", # "effect_second_skip_num",
"business_second_skip_num", # "business_second_skip_num",
"effect_second_skip_rate", # "effect_second_skip_rate",
"business_second_skip_rate", # "business_second_skip_rate",
"service_id", # "service_id",
"service_price", # "service_price",
"service_sold_num", # "service_sold_num",
"service_city", # "service_city",
"recommend_service_id", # "recommend_service_id",
"recommend_service_city", # "recommend_service_city",
"recommend_service_price", # "recommend_service_price",
] ]
INT_COLUMNS = [ INT_COLUMNS = [
"active_days", "active_days",
...@@ -213,10 +213,10 @@ INT_COLUMNS = [ ...@@ -213,10 +213,10 @@ INT_COLUMNS = [
"first_positions_num", "first_positions_num",
"second_positions_num", "second_positions_num",
"projects_num", "projects_num",
"effect_second_skip_num", # "effect_second_skip_num",
"business_second_skip_num", # "business_second_skip_num",
"service_price", # "service_price",
"service_sold_num", # "service_sold_num",
] ]
FLOAT_COLUMNS = [ FLOAT_COLUMNS = [
"one_ctr", "one_ctr",
...@@ -251,8 +251,8 @@ FLOAT_COLUMNS = [ ...@@ -251,8 +251,8 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg", "sixty_browse_duration_avg",
"ninety_browse_duration_avg", "ninety_browse_duration_avg",
"history_browse_duration_avg", "history_browse_duration_avg",
"effect_second_skip_rate", # "effect_second_skip_rate",
"business_second_skip_rate", # "business_second_skip_rate",
] ]
CATEGORICAL_COLUMNS = [ CATEGORICAL_COLUMNS = [
"device_id", "device_id",
...@@ -305,26 +305,26 @@ CATEGORICAL_COLUMNS = [ ...@@ -305,26 +305,26 @@ CATEGORICAL_COLUMNS = [
"click_diary_id3", "click_diary_id3",
"click_diary_id4", "click_diary_id4",
"click_diary_id5", "click_diary_id5",
"is_related_service", # "is_related_service",
"service_id", # "service_id",
"service_city", # "service_city",
"recommend_service_id", # "recommend_service_id",
"recommend_service_city", # "recommend_service_city",
"recommend_service_price", # "recommend_service_price",
"device_fd2", # "device_fd2",
"device_sd2", # "device_sd2",
"device_fs2", # "device_fs2",
"device_ss2", # "device_ss2",
"device_fp2", # "device_fp2",
"device_sp2", # "device_sp2",
"device_p2", # "device_p2",
"device_fd3", # "device_fd3",
"device_sd3", # "device_sd3",
"device_fs3", # "device_fs3",
"device_ss3", # "device_ss3",
"device_fp3", # "device_fp3",
"device_sp3", # "device_sp3",
"device_p3", # "device_p3",
] ]
CROSS_COLUMNS = [ CROSS_COLUMNS = [
["device_fd", "content_fd"], ["device_fd", "content_fd"],
...@@ -334,20 +334,20 @@ CROSS_COLUMNS = [ ...@@ -334,20 +334,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"], ["device_fp", "content_fp"],
["device_sp", "content_sp"], ["device_sp", "content_sp"],
["device_p", "content_p"], ["device_p", "content_p"],
["device_fd2", "content_fd"], # ["device_fd2", "content_fd"],
["device_sd2", "content_sd"], # ["device_sd2", "content_sd"],
["device_fs2", "content_fs"], # ["device_fs2", "content_fs"],
["device_ss2", "content_ss"], # ["device_ss2", "content_ss"],
["device_fp2", "content_fp"], # ["device_fp2", "content_fp"],
["device_sp2", "content_sp"], # ["device_sp2", "content_sp"],
["device_p2", "content_p"], # ["device_p2", "content_p"],
["device_fd3", "content_fd"], # ["device_fd3", "content_fd"],
["device_sd3", "content_sd"], # ["device_sd3", "content_sd"],
["device_fs3", "content_fs"], # ["device_fs3", "content_fs"],
["device_ss3", "content_ss"], # ["device_ss3", "content_ss"],
["device_fp3", "content_fp"], # ["device_fp3", "content_fp"],
["device_sp3", "content_sp"], # ["device_sp3", "content_sp"],
["device_p3", "content_p"], # ["device_p3", "content_p"],
] ]
...@@ -422,15 +422,15 @@ def diary_feature_engineering(df): ...@@ -422,15 +422,15 @@ def diary_feature_engineering(df):
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int) diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int) diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int) # diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df["service_id"] = diary_df["service_id"].astype(str) # diary_df["service_id"] = diary_df["service_id"].astype(str)
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str) # diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str)
diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str) # diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str)
diary_df["service_id"] = diary_df["service_id"].fillna("-1") # diary_df["service_id"] = diary_df["service_id"].fillna("-1")
diary_df["service_city"] = diary_df["service_city"].fillna("") # diary_df["service_city"] = diary_df["service_city"].fillna("")
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].fillna("-1") # diary_df["recommend_service_id"] = diary_df["recommend_service_id"].fillna("-1")
diary_df["recommend_service_city"] = diary_df["recommend_service_city"].fillna("") # diary_df["recommend_service_city"] = diary_df["recommend_service_city"].fillna("")
diary_df = diary_df[DIARY_COLUMNS] diary_df = diary_df[DIARY_COLUMNS]
...@@ -463,21 +463,21 @@ def join_features(device_df, diary_df, cc_df): ...@@ -463,21 +463,21 @@ def join_features(device_df, diary_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0)) df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0)) df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1)) # df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1)) # df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1)) # df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1)) # df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1)) # df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1)) # df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1)) # df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2)) # df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2)) # df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2)) # df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2)) # df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2)) # df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2)) # df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2)) # df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0)) df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
...@@ -566,20 +566,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict): ...@@ -566,20 +566,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
device_info["device_fp"] = nth_element(device_fp, 0) device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0) device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0) device_info["device_p"] = nth_element(device_p, 0)
device_info["device_fd2"] = nth_element(device_fd, 1) # device_info["device_fd2"] = nth_element(device_fd, 1)
device_info["device_sd2"] = nth_element(device_sd, 1) # device_info["device_sd2"] = nth_element(device_sd, 1)
device_info["device_fs2"] = nth_element(device_fs, 1) # device_info["device_fs2"] = nth_element(device_fs, 1)
device_info["device_ss2"] = nth_element(device_ss, 1) # device_info["device_ss2"] = nth_element(device_ss, 1)
device_info["device_fp2"] = nth_element(device_fp, 1) # device_info["device_fp2"] = nth_element(device_fp, 1)
device_info["device_sp2"] = nth_element(device_sp, 1) # device_info["device_sp2"] = nth_element(device_sp, 1)
device_info["device_p2"] = nth_element(device_p, 1) # device_info["device_p2"] = nth_element(device_p, 1)
device_info["device_fd3"] = nth_element(device_fd, 2) # device_info["device_fd3"] = nth_element(device_fd, 2)
device_info["device_sd3"] = nth_element(device_sd, 2) # device_info["device_sd3"] = nth_element(device_sd, 2)
device_info["device_fs3"] = nth_element(device_fs, 2) # device_info["device_fs3"] = nth_element(device_fs, 2)
device_info["device_ss3"] = nth_element(device_ss, 2) # device_info["device_ss3"] = nth_element(device_ss, 2)
device_info["device_fp3"] = nth_element(device_fp, 2) # device_info["device_fp3"] = nth_element(device_fp, 2)
device_info["device_sp3"] = nth_element(device_sp, 2) # device_info["device_sp3"] = nth_element(device_sp, 2)
device_info["device_p3"] = nth_element(device_p, 2) # device_info["device_p3"] = nth_element(device_p, 2)
diary_lst = [] diary_lst = []
diary_ids_res = [] diary_ids_res = []
for id in diary_ids: for id in diary_ids:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment